From 6b82021b9e91cd689fdffadbcdb9a42597bbe764 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Mon, 5 Apr 2010 18:17:14 -0700 Subject: ocfs2: increase the default size of local alloc windows I have observed that the current size of 8M gives us pretty poor fragmentation on multi-threaded workloads which do lots of writes. Generally, I can increase the size of local alloc windows and observe a marked decrease in fragmentation, even up and beyond window sizes of 512 megabytes. This makes sense for a couple reasons - larger local alloc means more room for reservation windows. On multi-node workloads the larger local alloc helps as well because we don't have to do window slides as often. Also, I removed the OCFS2_DEFAULT_LOCAL_ALLOC_SIZE constant as it is no longer used and the comment above it was out of date. To test fragmentation, I used a workload which launched 4 threads that did 4k writes into a series of about 140 alternating files. With resv_level=2, and a 4k/4k file system I observed the following average fragmentation for various localalloc= parameters: localalloc= avg. fragmentation 8 48 32 16 64 10 120 7 On larger cluster sizes, the difference is more dramatic. The new default size top out at 256M, which we'll only get for cluster sizes of 32K and above. Signed-off-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/ocfs2_fs.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index bb37218a797..d61a1521b10 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -282,14 +282,6 @@ /* Journal limits (in bytes) */ #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) -/* - * Default local alloc size (in megabytes) - * - * The value chosen should be such that most allocations, including new - * block groups, use local alloc. - */ -#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 - /* * Inline extended attribute size (in bytes) * The value chosen should be aligned to 16 byte boundaries. -- cgit From 4cbe4249d6586d5d88ef271e07302407a14c8443 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 13 Apr 2010 14:26:12 +0800 Subject: ocfs2: Define data structures for discontiguous block groups. Defines the OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG feature bit and modifies struct ocfs2_group_desc for the feature. Signed-off-by: Joel Becker Signed-off-by: Tao Ma --- fs/ocfs2/ocfs2_fs.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index d61a1521b10..448aa8d11a9 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -165,6 +165,9 @@ /* Refcount tree support */ #define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000 +/* Discontigous block groups */ +#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 + /* * backup superblock flag is used to indicate that this volume * has backup superblocks. @@ -831,6 +834,13 @@ struct ocfs2_dx_leaf { struct ocfs2_dx_entry_list dl_list; }; +/* + * Largest bitmap for a block (suballocator) group in bytes. This limit + * does not affect cluster groups (global allocator). Cluster group + * bitmaps run to the end of the block. + */ +#define OCFS2_MAX_BG_BITMAP_SIZE 256 + /* * On disk allocator group structure for OCFS2 */ @@ -852,7 +862,29 @@ struct ocfs2_group_desc __le64 bg_blkno; /* Offset on disk, in blocks */ /*30*/ struct ocfs2_block_check bg_check; /* Error checking */ __le64 bg_reserved2; -/*40*/ __u8 bg_bitmap[0]; +/*40*/ union { + __u8 bg_bitmap[0]; + struct { + /* + * Block groups may be discontiguous when + * OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG is set. + * The extents of a discontigous block group are + * stored in bg_list. It is a flat list. + * l_tree_depth must always be zero. A + * discontiguous group is signified by a non-zero + * bg_list->l_next_free_rec. Only block groups + * can be discontiguous; Cluster groups cannot. + * We've never made a block group with more than + * 2048 blocks (256 bytes of bg_bitmap). This + * codifies that limit so that we can fit bg_list. + * bg_size of a discontiguous block group will + * be 256 to match bg_bitmap_filler. + */ + __u8 bg_bitmap_filler[OCFS2_MAX_BG_BITMAP_SIZE]; +/*140*/ struct ocfs2_extent_list bg_list; + }; + }; +/* Actual on-disk size is one block */ }; struct ocfs2_refcount_rec { @@ -1276,12 +1308,16 @@ static inline u16 ocfs2_local_alloc_size(struct super_block *sb) return size; } -static inline int ocfs2_group_bitmap_size(struct super_block *sb) +static inline int ocfs2_group_bitmap_size(struct super_block *sb, + int suballocator) { int size; - size = sb->s_blocksize - - offsetof(struct ocfs2_group_desc, bg_bitmap); + if (suballocator) + size = OCFS2_MAX_BG_BITMAP_SIZE; + else + size = sb->s_blocksize - + offsetof(struct ocfs2_group_desc, bg_bitmap); return size; } @@ -1404,12 +1440,15 @@ static inline int ocfs2_local_alloc_size(int blocksize) return size; } -static inline int ocfs2_group_bitmap_size(int blocksize) +static inline int ocfs2_group_bitmap_size(int blocksize, int suballocator) { int size; - size = blocksize - - offsetof(struct ocfs2_group_desc, bg_bitmap); + if (suballocator) + size = OCFS2_MAX_BG_BITMAP_SIZE; + else + size = blocksize - + offsetof(struct ocfs2_group_desc, bg_bitmap); return size; } -- cgit From 798db35f4649eac2778381c390ed7d12de9ec767 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 13 Apr 2010 14:26:32 +0800 Subject: ocfs2: Allocate discontiguous block groups. If we cannot get a contiguous region for a block group, allocate a discontiguous one when the filesystem supports it. Signed-off-by: Joel Becker Signed-off-by: Tao Ma --- fs/ocfs2/ocfs2_fs.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 448aa8d11a9..888ba4ec42c 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -1278,6 +1278,16 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb) return size / sizeof(struct ocfs2_extent_rec); } +static inline u16 ocfs2_extent_recs_per_gd(struct super_block *sb) +{ + int size; + + size = sb->s_blocksize - + offsetof(struct ocfs2_group_desc, bg_list.l_recs); + + return size / sizeof(struct ocfs2_extent_rec); +} + static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb) { int size; @@ -1430,6 +1440,16 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize) return size / sizeof(struct ocfs2_extent_rec); } +static inline int ocfs2_extent_recs_per_gd(int blocksize) +{ + int size; + + size = blocksize - + offsetof(struct ocfs2_group_desc, bg_list.l_recs); + + return size / sizeof(struct ocfs2_extent_rec); +} + static inline int ocfs2_local_alloc_size(int blocksize) { int size; -- cgit From 9cbc01231e82f9390edaea2b766abcb7165dc4b2 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 26 Mar 2010 10:07:42 +0800 Subject: ocfs2: Add suballoc_loc to metadata blocks. We need a suballoc_loc field on any suballocated block. Define them. Signed-off-by: Joel Becker --- fs/ocfs2/ocfs2_fs.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 888ba4ec42c..a17bce591ee 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -507,7 +507,10 @@ struct ocfs2_extent_block block group */ __le32 h_fs_generation; /* Must match super block */ __le64 h_blkno; /* Offset on disk, in blocks */ -/*20*/ __le64 h_reserved3; +/*20*/ __le64 h_suballoc_loc; /* Suballocator block group this + eb belongs to. Only valid + if allocated from a + discontiguous block group */ __le64 h_next_leaf_blk; /* Offset on disk, in blocks, of next leaf header pointing to data */ @@ -674,7 +677,11 @@ struct ocfs2_dinode { /*80*/ struct ocfs2_block_check i_check; /* Error checking */ /*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ /*90*/ __le64 i_refcount_loc; - __le64 i_reserved2[4]; + __le64 i_suballoc_loc; /* Suballocator block group this + inode belongs to. Only valid + if allocated from a + discontiguous block group */ +/*A0*/ __le64 i_reserved2[3]; /*B8*/ union { __le64 i_pad1; /* Generic way to refer to this 64bit union */ @@ -809,7 +816,12 @@ struct ocfs2_dx_root_block { __le32 dr_reserved2; __le64 dr_free_blk; /* Pointer to head of free * unindexed block list. */ - __le64 dr_reserved3[15]; + __le64 dr_suballoc_loc; /* Suballocator block group + this root belongs to. + Only valid if allocated + from a discontiguous + block group */ + __le64 dr_reserved3[14]; union { struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 * bits for maximum space @@ -929,7 +941,11 @@ struct ocfs2_refcount_block { /*40*/ __le32 rf_generation; /* generation number. all be the same * for the same refcount tree. */ __le32 rf_reserved0; - __le64 rf_reserved1[7]; + __le64 rf_suballoc_loc; /* Suballocator block group this + refcount block belongs to. Only + valid if allocated from a + discontiguous block group */ +/*50*/ __le64 rf_reserved1[6]; /*80*/ union { struct ocfs2_refcount_list rf_records; /* List of refcount records */ @@ -1041,7 +1057,10 @@ struct ocfs2_xattr_block { real xattr or a xattr tree. */ __le16 xb_reserved0; __le32 xb_reserved1; - __le64 xb_reserved2; + __le64 xb_suballoc_loc; /* Suballocator block group this + xattr block belongs to. Only + valid if allocated from a + discontiguous block group */ /*30*/ union { struct ocfs2_xattr_header xb_header; /* xattr header if this block contains xattr */ -- cgit From 8571882c21e5073b2f96147ec4ff9b7042339e1b Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 13 Apr 2010 14:38:06 +0800 Subject: ocfs2: ocfs2_group_bitmap_size has to handle old volume. ocfs2_group_bitmap_size has to handle the case when the volume don't have discontiguous block group support. So pass the feature_incompat in and check it. Signed-off-by: Tao Ma --- fs/ocfs2/ocfs2_fs.h | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index a17bce591ee..67bb8a77e86 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -1338,15 +1338,21 @@ static inline u16 ocfs2_local_alloc_size(struct super_block *sb) } static inline int ocfs2_group_bitmap_size(struct super_block *sb, - int suballocator) + int suballocator, + u32 feature_incompat) { - int size; + int size = sb->s_blocksize - + offsetof(struct ocfs2_group_desc, bg_bitmap); - if (suballocator) + /* + * The cluster allocator uses the entire block. Suballocators have + * never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older + * code expects bg_size set to the maximum. Thus we must keep + * bg_size as-is unless discontig_bg is enabled. + */ + if (suballocator && + (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)) size = OCFS2_MAX_BG_BITMAP_SIZE; - else - size = sb->s_blocksize - - offsetof(struct ocfs2_group_desc, bg_bitmap); return size; } @@ -1479,15 +1485,22 @@ static inline int ocfs2_local_alloc_size(int blocksize) return size; } -static inline int ocfs2_group_bitmap_size(int blocksize, int suballocator) +static inline int ocfs2_group_bitmap_size(int blocksize, + int suballocator, + uint32_t feature_incompat) { - int size; + int size = sb->s_blocksize - + offsetof(struct ocfs2_group_desc, bg_bitmap); - if (suballocator) + /* + * The cluster allocator uses the entire block. Suballocators have + * never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older + * code expects bg_size set to the maximum. Thus we must keep + * bg_size as-is unless discontig_bg is enabled. + */ + if (suballocator && + (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)) size = OCFS2_MAX_BG_BITMAP_SIZE; - else - size = blocksize - - offsetof(struct ocfs2_group_desc, bg_bitmap); return size; } -- cgit From af2bf0d86019e0b0306965321096f8380b7ca830 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 17 May 2010 15:14:17 +0800 Subject: ocfs2: Add ocfs2_gd_is_discontig. Add ocfs2_gd_is_discontig so that we can test whether a group descriptor is discontiguous or not. Signed-off-by: Tao Ma --- fs/ocfs2/ocfs2_fs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 67bb8a77e86..b01d0dddfcc 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -1574,5 +1574,19 @@ static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de, de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; } +static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd) +{ + if ((offsetof(struct ocfs2_group_desc, bg_bitmap) + + le16_to_cpu(gd->bg_size)) != + offsetof(struct ocfs2_group_desc, bg_list)) + return 0; + /* + * Only valid to check l_next_free_rec if + * bg_bitmap + bg_size == bg_list. + */ + if (!gd->bg_list.l_next_free_rec) + return 0; + return 1; +} #endif /* _OCFS2_FS_H */ -- cgit From 1a934c3e57594588c373aea858e4593cdfcba4f4 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 18 Mar 2010 15:54:22 +0800 Subject: ocfs2: enable discontig block group support. Signed-off-by: Tao Ma --- fs/ocfs2/ocfs2_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index b01d0dddfcc..33f1c9a8258 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -100,7 +100,8 @@ | OCFS2_FEATURE_INCOMPAT_XATTR \ | OCFS2_FEATURE_INCOMPAT_META_ECC \ | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ - | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) + | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ + | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) -- cgit