From 93c0d86371a5b2e68473752a6e54ff03185c473e Mon Sep 17 00:00:00 2001 From: "Solofo.Ramangalahy@bull.net" <> Date: Wed, 26 Nov 2008 23:44:10 -0500 Subject: ext4: When resizing set the EXT4_BG_INODE_ZEROED flag for new block groups The inode table has been zeroed in setup_new_group_blocks(). Mark it as such in ext4_group_add(). Since we are currently clearing inode table for the new block group, we should set the EXT4_BG_INODE_ZEROED flag. If at some point in the future we don't immediately zero out the inode table as part of the resize operation, then obviously we shouldn't do this. Signed-off-by: Solofo.Ramangalahy@bull.net Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b6ec1843a01..d448eb1d9ba 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -864,6 +864,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); + gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); /* -- cgit From 0390131ba84fd3f726f9e24fc4553828125700bb Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Wed, 7 Jan 2009 00:06:22 -0500 Subject: ext4: Allow ext4 to run without a journal A few weeks ago I posted a patch for discussion that allowed ext4 to run without a journal. Since that time I've integrated the excellent comments from Andreas and fixed several serious bugs. We're currently running with this patch and generating some performance numbers against both ext2 (with backported reservations code) and ext4 with and without a journal. It just so happens that running without a journal is slightly faster for most everything. We did iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2 which creates 4 threads, each of which create and do reads and writes on a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens to bypass the page cache. Results: ext2 ext4, default ext4, no journal initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s reads 15.2 MB/s 16.9 MB/s 17.2 MB/s re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s So it seems that, so far, this was a useful exercise. Signed-off-by: Frank Mayhar Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index d448eb1d9ba..1665aa131d1 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -149,7 +149,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh, { int err; - if (handle->h_buffer_credits >= thresh) + if (ext4_handle_has_enough_credits(handle, thresh)) return 0; err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA); @@ -232,7 +232,7 @@ static int setup_new_group_blocks(struct super_block *sb, memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); set_buffer_uptodate(gdb); unlock_buffer(gdb); - ext4_journal_dirty_metadata(handle, gdb); + ext4_handle_dirty_metadata(handle, NULL, gdb); ext4_set_bit(bit, bh->b_data); brelse(gdb); } @@ -251,7 +251,7 @@ static int setup_new_group_blocks(struct super_block *sb, err = PTR_ERR(bh); goto exit_bh; } - ext4_journal_dirty_metadata(handle, gdb); + ext4_handle_dirty_metadata(handle, NULL, gdb); ext4_set_bit(bit, bh->b_data); brelse(gdb); } @@ -276,7 +276,7 @@ static int setup_new_group_blocks(struct super_block *sb, err = PTR_ERR(it); goto exit_bh; } - ext4_journal_dirty_metadata(handle, it); + ext4_handle_dirty_metadata(handle, NULL, it); brelse(it); ext4_set_bit(bit, bh->b_data); } @@ -286,7 +286,7 @@ static int setup_new_group_blocks(struct super_block *sb, mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), bh->b_data); - ext4_journal_dirty_metadata(handle, bh); + ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); /* Mark unused entries in inode bitmap used */ @@ -299,7 +299,7 @@ static int setup_new_group_blocks(struct super_block *sb, mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), bh->b_data); - ext4_journal_dirty_metadata(handle, bh); + ext4_handle_dirty_metadata(handle, NULL, bh); exit_bh: brelse(bh); @@ -486,12 +486,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, * reserved inode, and will become GDT blocks (primary and backup). */ data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; - ext4_journal_dirty_metadata(handle, dind); + ext4_handle_dirty_metadata(handle, NULL, dind); brelse(dind); inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; ext4_mark_iloc_dirty(handle, inode, &iloc); memset((*primary)->b_data, 0, sb->s_blocksize); - ext4_journal_dirty_metadata(handle, *primary); + ext4_handle_dirty_metadata(handle, NULL, *primary); o_group_desc = EXT4_SB(sb)->s_group_desc; memcpy(n_group_desc, o_group_desc, @@ -502,7 +502,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, kfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); - ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); return 0; @@ -618,7 +618,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, primary[i]->b_blocknr, gdbackups, blk + primary[i]->b_blocknr); */ data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); - err2 = ext4_journal_dirty_metadata(handle, primary[i]); + err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]); if (!err) err = err2; } @@ -676,7 +676,8 @@ static void update_backups(struct super_block *sb, struct buffer_head *bh; /* Out of journal space, and can't get more - abort - so sad */ - if (handle->h_buffer_credits == 0 && + if (ext4_handle_valid(handle) && + handle->h_buffer_credits == 0 && ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) && (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) break; @@ -696,7 +697,7 @@ static void update_backups(struct super_block *sb, memset(bh->b_data + size, 0, rest); set_buffer_uptodate(bh); unlock_buffer(bh); - ext4_journal_dirty_metadata(handle, bh); + ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); } if ((err2 = ext4_journal_stop(handle)) && !err) @@ -916,7 +917,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) /* Update the global fs size fields */ sbi->s_groups_count++; - ext4_journal_dirty_metadata(handle, primary); + ext4_handle_dirty_metadata(handle, NULL, primary); /* Update the reserved block counts only once the new group is * active. */ @@ -938,7 +939,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) EXT4_INODES_PER_GROUP(sb); } - ext4_journal_dirty_metadata(handle, sbi->s_sbh); + ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); sb->s_dirt = 1; exit_journal: @@ -1072,7 +1073,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, goto exit_put; } ext4_blocks_count_set(es, o_blocks_count + add); - ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); sb->s_dirt = 1; unlock_super(sb); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, -- cgit From fde4d95ad8711c84a36735a17136c45b19746af9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 22:17:35 -0500 Subject: ext4: remove extraneous newlines from calls to ext4_error() and ext4_warning() This removes annoying blank syslog entries emitted by ext4_error() or ext4_warning(), since these functions add their own newline. Signed-off-by: Nick Warne Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1665aa131d1..41133811744 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -762,13 +762,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (ext4_blocks_count(es) + input->blocks_count < ext4_blocks_count(es)) { - ext4_warning(sb, __func__, "blocks_count overflow\n"); + ext4_warning(sb, __func__, "blocks_count overflow"); return -EINVAL; } if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < le32_to_cpu(es->s_inodes_count)) { - ext4_warning(sb, __func__, "inodes_count overflow\n"); + ext4_warning(sb, __func__, "inodes_count overflow"); return -EINVAL; } @@ -999,8 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, " too large to resize to %llu blocks safely\n", sb->s_id, n_blocks_count); if (sizeof(sector_t) < 8) - ext4_warning(sb, __func__, - "CONFIG_LBD not enabled\n"); + ext4_warning(sb, __func__, "CONFIG_LBD not enabled"); return -EINVAL; } -- cgit From a9df9a49102f3578909cba7bd33784eb3b9caaa4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 22:18:16 -0500 Subject: ext4: Make ext4_group_t be an unsigned int Nearly all places in the ext3/4 code which uses "unsigned long" is probably a bug, since on 32-bit systems a ulong a 32-bits, which means we are wasting stack space on 64-bit systems. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 41133811744..1865d6a53de 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -50,7 +50,7 @@ static int verify_group_input(struct super_block *sb, ext4_get_group_no_and_offset(sb, start, NULL, &offset); if (group != sbi->s_groups_count) ext4_warning(sb, __func__, - "Cannot add at group %u (only %lu groups)", + "Cannot add at group %u (only %u groups)", input->group, sbi->s_groups_count); else if (offset != 0) ext4_warning(sb, __func__, "Last group not full"); @@ -716,7 +716,7 @@ static void update_backups(struct super_block *sb, exit_err: if (err) { ext4_warning(sb, __func__, - "can't update backup for group %lu (err %d), " + "can't update backup for group %u (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT4_VALID_FS; sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); -- cgit From e21675d4b63975d09eb75c443c48ebe663d23e18 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 5 Jan 2009 21:36:02 -0500 Subject: ext4: Add blocks added during resize to bitmap With this change new blocks added during resize are marked as free in the block bitmap and the group is flagged with EXT4_GROUP_INFO_NEED_INIT_BIT flag. This makes sure when mballoc tries to allocate blocks from the new group we would reload the buddy information using the bitmap present in the disk. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/resize.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1865d6a53de..526db73701b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -977,9 +977,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, struct buffer_head *bh; handle_t *handle; int err; - unsigned long freed_blocks; ext4_group_t group; - struct ext4_group_info *grp; /* We don't need to worry about locking wrt other resizers just * yet: we're going to revalidate es->s_blocks_count after @@ -1077,7 +1075,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, unlock_super(sb); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); - ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); + /* We add the blocks to the bitmap and set the group need init bit */ + ext4_add_groupblocks(handle, sb, o_blocks_count, add); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); if ((err = ext4_journal_stop(handle))) @@ -1120,12 +1119,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ClearPageUptodate(page); page_cache_release(page); } - - /* Get the info on the last group */ - grp = ext4_get_group_info(sb, group); - - /* Update free blocks in group info */ - ext4_mb_update_group_info(grp, add); } if (test_opt(sb, DEBUG)) -- cgit From 920313a726e04fef0f2c0bcb04ad8229c0e700d8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 5 Jan 2009 21:36:19 -0500 Subject: ext4: Use EXT4_GROUP_INFO_NEED_INIT_BIT during resize The new groups added during resize are flagged as need_init group. Make sure we properly initialize these groups. When we have block size < page size and we are adding new groups the page may still be marked uptodate even though we haven't initialized the group. While forcing the init of buddy cache we need to make sure other groups part of the same page of buddy cache is not using the cache. group_info->alloc_sem is added to ensure the same. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" cc: stable@kernel.org --- fs/ext4/resize.c | 49 ++++++++----------------------------------------- 1 file changed, 8 insertions(+), 41 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 526db73701b..92034d2c8a7 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -748,6 +748,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) struct inode *inode = NULL; handle_t *handle; int gdb_off, gdb_num; + int num_grp_locked = 0; int err, err2; gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); @@ -788,6 +789,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } } + if ((err = verify_group_input(sb, input))) goto exit_put; @@ -856,6 +858,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * using the new disk blocks. */ + num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)((char *)primary->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -872,9 +875,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * We can allocate memory for mb_alloc based on the new group * descriptor */ - err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); - if (err) + err = ext4_mb_add_groupinfo(sb, input->group, gdp); + if (err) { + ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); goto exit_journal; + } /* * Make the new blocks and inodes valid next. We do this before @@ -916,6 +921,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) /* Update the global fs size fields */ sbi->s_groups_count++; + ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); ext4_handle_dirty_metadata(handle, NULL, primary); @@ -1082,45 +1088,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, if ((err = ext4_journal_stop(handle))) goto exit_put; - /* - * Mark mballoc pages as not up to date so that they will be updated - * next time they are loaded by ext4_mb_load_buddy. - * - * XXX Bad, Bad, BAD!!! We should not be overloading the - * Uptodate flag, particularly on thte bitmap bh, as way of - * hinting to ext4_mb_load_buddy() that it needs to be - * overloaded. A user could take a LVM snapshot, then do an - * on-line fsck, and clear the uptodate flag, and this would - * not be a bug in userspace, but a bug in the kernel. FIXME!!! - */ - { - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct inode *inode = sbi->s_buddy_cache; - int blocks_per_page; - int block; - int pnum; - struct page *page; - - /* Set buddy page as not up to date */ - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; - block = group * 2; - pnum = block / blocks_per_page; - page = find_get_page(inode->i_mapping, pnum); - if (page != NULL) { - ClearPageUptodate(page); - page_cache_release(page); - } - - /* Set bitmap page as not up to date */ - block++; - pnum = block / blocks_per_page; - page = find_get_page(inode->i_mapping, pnum); - if (page != NULL) { - ClearPageUptodate(page); - page_cache_release(page); - } - } - if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", ext4_blocks_count(es)); -- cgit From 560671a0d3c9ad2d647fa6d09375a262e1f19c4f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 5 Jan 2009 22:20:24 -0500 Subject: ext4: Use high 16 bits of the block group descriptor's free counts fields Rename the lower bits with suffix _lo and add helper to access the values. Also rename bg_itable_unused_hi to bg_pad as in e2fsprogs. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 92034d2c8a7..2d24f423fd8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -866,8 +866,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); - gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); + ext4_free_blks_set(sb, gdp, input->free_blocks_count); + ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); -- cgit From 648f5879f5892dddd3ba71cd0d285599f40f2512 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 5 Jan 2009 21:46:04 -0500 Subject: ext4: mark the blocks/inode bitmap beyond end of group as used We need to mark the block/inode bitmap beyond the end of the group with '1'. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/resize.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 2d24f423fd8..c328be5d688 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -284,11 +284,9 @@ static int setup_new_group_blocks(struct super_block *sb, if ((err = extend_or_restart_transaction(handle, 2, bh))) goto exit_bh; - mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), - bh->b_data); + mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); - /* Mark unused entries in inode bitmap used */ ext4_debug("clear inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, input->inode_bitmap - start); @@ -297,7 +295,7 @@ static int setup_new_group_blocks(struct super_block *sb, goto exit_journal; } - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); ext4_handle_dirty_metadata(handle, NULL, bh); exit_bh: -- cgit