From 7eaebe7d503c3ef240ac7b3efc5433fe647c0298 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: removed unused #include 's Removed unused #include 's in btrfs Signed-off-by: Huang Weiyi Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81a313874ae..37e12f62039 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include -- cgit From 57506d50ed6db7b0e7ddc9845e86e81f140983d5 Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: check return value for kthread_run() correctly kthread_run() returns the kthread or ERR_PTR(-ENOMEM), not NULL. Signed-off-by: Qinghuang Feng Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 37e12f62039..0d8ccd625ba 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1739,13 +1739,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); - if (!fs_info->cleaner_kthread) + if (IS_ERR(fs_info->cleaner_kthread)) goto fail_csum_root; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); - if (!fs_info->transaction_kthread) + if (IS_ERR(fs_info->transaction_kthread)) goto fail_cleaner; if (btrfs_super_log_root(disk_super) != 0) { -- cgit From c6e308713a47527f88a277ee95b7c5d1db80f77b Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Wed, 21 Jan 2009 10:59:08 -0500 Subject: Btrfs: simplify iteration codes Merge list_for_each* and list_entry to list_for_each_entry* Signed-off-by: Qinghuang Feng Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d8ccd625ba..26a18779e84 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1135,7 +1135,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; int ret = 0; - struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; #if 0 @@ -1143,8 +1142,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) btrfs_congested_async(info, 0)) return 1; #endif - list_for_each(cur, &info->fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; bdi = blk_get_backing_dev_info(device->bdev); @@ -1162,13 +1160,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) */ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { - struct list_head *cur; struct btrfs_device *device; struct btrfs_fs_info *info; info = (struct btrfs_fs_info *)bdi->unplug_io_data; - list_for_each(cur, &info->fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; @@ -1994,7 +1990,6 @@ static int write_dev_supers(struct btrfs_device *device, int write_all_supers(struct btrfs_root *root, int max_mirrors) { - struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; struct btrfs_super_block *sb; @@ -2010,8 +2005,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) sb = &root->fs_info->super_for_commit; dev_item = &sb->dev_item; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) { total_errors++; continue; @@ -2044,8 +2038,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) } total_errors = 0; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; if (!dev->in_fs_metadata || !dev->writeable) -- cgit From 7e6628544abad773222d8b177f738ac2db1859de Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: open_ctree() error handling can oops on fs_info a bug in open_ctree: struct btrfs_root *open_ctree(..) { .... if (!extent_root || !tree_root || !fs_info || !chunk_root || !dev_root || !csum_root) { err = -ENOMEM; goto fail; //When code flow goes to "fail", fs_info may be NULL or uninitialized. } .... fail: btrfs_close_devices(fs_info->fs_devices);// ! btrfs_mapping_tree_free(&fs_info->mapping_tree);// ! kfree(extent_root); kfree(tree_root); bdi_destroy(&fs_info->bdi);// ! ... ) Signed-off-by: Qinghuang Feng Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 26a18779e84..3cf17257f89 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1823,13 +1823,14 @@ fail_sb_buffer: fail_iput: invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); -fail: + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); + bdi_destroy(&fs_info->bdi); +fail: kfree(extent_root); kfree(tree_root); - bdi_destroy(&fs_info->bdi); kfree(fs_info); kfree(chunk_root); kfree(dev_root); -- cgit From 7237f1833601dcc435a64176c2c347ec4bd959f9 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 21 Jan 2009 12:54:03 -0500 Subject: Btrfs: fix tree logs parallel sync To improve performance, btrfs_sync_log merges tree log sync requests. But it wrongly merges sync requests for different tree logs. If multiple tree logs are synced at the same time, only one of them actually gets synced. This patch has following changes to fix the bug: Move most tree log related fields in btrfs_fs_info to btrfs_root. This allows merging sync requests separately for each tree log. Don't insert root item into the log root tree immediately after log tree is allocated. Root item for log tree is inserted when log tree get synced for the first time. This allows syncing the log root tree without first syncing all log trees. At tree-log sync, btrfs_sync_log first sync the log tree; then updates corresponding root item in the log root tree; sync the log root tree; then update the super block. Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3cf17257f89..7feac5a475e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -849,6 +849,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + init_waitqueue_head(&root->log_writer_wait); + init_waitqueue_head(&root->log_commit_wait[0]); + init_waitqueue_head(&root->log_commit_wait[1]); + atomic_set(&root->log_commit[0], 0); + atomic_set(&root->log_commit[1], 0); + atomic_set(&root->log_writers, 0); + root->log_batch = 0; + root->log_transid = 0; extent_io_tree_init(&root->dirty_log_pages, fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -933,15 +941,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, return 0; } -int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; + struct extent_buffer *leaf; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) - return -ENOMEM; + return ERR_PTR(-ENOMEM); __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, @@ -950,12 +959,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; + /* + * log trees do not get reference counted because they go away + * before a real commit is actually done. They do store pointers + * to file data extents, and those reference counts still get + * updated (along with back refs to the log tree). + */ root->ref_cows = 0; - root->node = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, BTRFS_TREE_LOG_OBJECTID, - trans->transid, 0, 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + 0, BTRFS_TREE_LOG_OBJECTID, + trans->transid, 0, 0, 0); + if (IS_ERR(leaf)) { + kfree(root); + return ERR_CAST(leaf); + } + root->node = leaf; btrfs_set_header_nritems(root->node, 0); btrfs_set_header_level(root->node, 0); btrfs_set_header_bytenr(root->node, root->node->start); @@ -967,7 +987,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); - fs_info->log_root_tree = root; + return root; +} + +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *log_root; + + log_root = alloc_log_tree(trans, fs_info); + if (IS_ERR(log_root)) + return PTR_ERR(log_root); + WARN_ON(fs_info->log_root_tree); + fs_info->log_root_tree = log_root; + return 0; +} + +int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_root *log_root; + struct btrfs_inode_item *inode_item; + + log_root = alloc_log_tree(trans, root->fs_info); + if (IS_ERR(log_root)) + return PTR_ERR(log_root); + + log_root->last_trans = trans->transid; + log_root->root_key.offset = root->root_key.objectid; + + inode_item = &log_root->root_item.inode; + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nbytes = cpu_to_le64(root->leafsize); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + + btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); + btrfs_set_root_generation(&log_root->root_item, trans->transid); + + WARN_ON(root->log_root); + root->log_root = log_root; + root->log_transid = 0; return 0; } @@ -1530,10 +1591,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); - init_waitqueue_head(&fs_info->tree_log_wait); - atomic_set(&fs_info->tree_log_commit, 0); - atomic_set(&fs_info->tree_log_writers, 0); - fs_info->tree_log_transid = 0; __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); -- cgit From b51912c91fcf7581cc7b4550f1bb96422809d9ed Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Feb 2009 09:23:24 -0500 Subject: Btrfs: async threads should try harder to find work Tracing shows the delay between when an async thread goes to sleep and when more work is added is often very short. This commit adds a little bit of delay and extra checking to the code right before we schedule out. It allows more work to be added to the worker without requiring notifications from other procs. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7feac5a475e..9c381004797 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1679,6 +1679,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, * low idle thresh */ fs_info->endio_workers.idle_thresh = 4; + fs_info->endio_meta_workers.idle_thresh = 4; + fs_info->endio_write_workers.idle_thresh = 64; fs_info->endio_meta_write_workers.idle_thresh = 64; -- cgit From c487685d7c18a8481900755aa5c56a7a74193101 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Feb 2009 09:24:25 -0500 Subject: Btrfs: hash_lock is no longer needed Before metadata is written to disk, it is updated to reflect that writeout has begun. Once this update is done, the block must be cow'd before it can be modified again. This update was originally synchronized by using a per-fs spinlock. Today the buffers for the metadata blocks are locked before writeout begins, and everyone that tests the flag has the buffer locked as well. So, the per-fs spinlock (called hash_lock for no good reason) is no longer required. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9c381004797..549271607c1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1503,7 +1503,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); - spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); @@ -2361,7 +2360,6 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) int btree_lock_page_hook(struct page *page) { struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_buffer *eb; unsigned long len; @@ -2376,9 +2374,7 @@ int btree_lock_page_hook(struct page *page) goto out; btrfs_tree_lock(eb); - spin_lock(&root->fs_info->hash_lock); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_unlock(&root->fs_info->hash_lock); btrfs_tree_unlock(eb); free_extent_buffer(eb); out: -- cgit From b4ce94de9b4d64e8ab3cf155d13653c666e22b9b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Feb 2009 09:25:08 -0500 Subject: Btrfs: Change btree locking to use explicit blocking points Most of the btrfs metadata operations can be protected by a spinlock, but some operations still need to schedule. So far, btrfs has been using a mutex along with a trylock loop, most of the time it is able to avoid going for the full mutex, so the trylock loop is a big performance gain. This commit is step one for getting rid of the blocking locks entirely. btrfs_tree_lock takes a spinlock, and the code explicitly switches to a blocking lock when it starts an operation that can schedule. We'll be able get rid of the blocking locks in smaller pieces over time. Tracing allows us to find the most common cause of blocking, so we can start with the hot spots first. The basic idea is: btrfs_tree_lock() returns with the spin lock held btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in the extent buffer flags, and then drops the spin lock. The buffer is still considered locked by all of the btrfs code. If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops the spin lock and waits on a wait queue for the blocking bit to go away. Much of the code that needs to set the blocking bit finishes without actually blocking a good percentage of the time. So, an adaptive spin is still used against the blocking bit to avoid very high context switch rates. btrfs_clear_lock_blocking() clears the blocking bit and returns with the spinlock held again. btrfs_tree_unlock() can be called on either blocking or spinning locks, it does the right thing based on the blocking bit. ctree.c has a helper function to set/clear all the locked buffers in a path as blocking. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 549271607c1..5aebddd7119 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -799,7 +799,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) - buf->flags |= EXTENT_UPTODATE; + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); else WARN_ON(1); return buf; @@ -813,6 +813,10 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) { WARN_ON(!btrfs_tree_locked(buf)); + + /* ugh, clear_extent_buffer_dirty can be expensive */ + btrfs_set_lock_blocking(buf); + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -2311,6 +2315,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; + btrfs_set_lock_blocking(buf); + WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " @@ -2353,7 +2359,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) int ret; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) - buf->flags |= EXTENT_UPTODATE; + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); return ret; } -- cgit