diff options
author | mohit84 <moagrawa@redhat.com> | 2020-10-11 10:56:57 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-11 10:56:57 +0530 |
commit | ecdc77ceb9a5864be1fd0b3d7f919fa9ce60132e (patch) | |
tree | 43604064dee2495ebbd32d777778508fa90eb07c | |
parent | 8d54899724a31f29848e1461f68ce2cf40585056 (diff) | |
download | glusterfs-ecdc77ceb9a5864be1fd0b3d7f919fa9ce60132e.tar.gz glusterfs-ecdc77ceb9a5864be1fd0b3d7f919fa9ce60132e.tar.xz glusterfs-ecdc77ceb9a5864be1fd0b3d7f919fa9ce60132e.zip |
core: configure optimum inode table hash_size for shd (#1576)
In brick_mux environment a shd process consume high memory.
After print the statedump i have found it allocates 1M per afr xlator
for all bricks.In case of configure 4k volumes it consumes almost total
6G RSS size in which 4G consumes by inode_tables
[cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage]
size=1273488
num_allocs=2
max_size=1273488
max_num_allocs=2
total_allocs=2
inode_new_table function allocates memory(1M) for a list of inode and dentry hash.
For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce
RSS size for shd process pass optimum bucket count at the time of creating inode_table.
Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
Fixes: #1538
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
-rw-r--r-- | api/src/glfs-master.c | 2 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs/inode.h | 17 | ||||
-rw-r--r-- | libglusterfs/src/inode.c | 51 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 10 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 2 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.c | 2 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 2 | ||||
-rw-r--r-- | xlators/features/quota/src/quotad-helpers.c | 2 | ||||
-rw-r--r-- | xlators/features/trash/src/trash.c | 2 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 6 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs.c | 2 | ||||
-rw-r--r-- | xlators/protocol/server/src/server-handshake.c | 2 |
12 files changed, 65 insertions, 35 deletions
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c index 100dcc16cc..331a60be95 100644 --- a/api/src/glfs-master.c +++ b/api/src/glfs-master.c @@ -39,7 +39,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph) } if (!new_subvol->itable) { - itable = inode_table_new(131072, new_subvol); + itable = inode_table_new(131072, new_subvol, 0, 0); if (!itable) { errno = ENOMEM; ret = -1; diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h index 4b28da510c..c126822698 100644 --- a/libglusterfs/src/glusterfs/inode.h +++ b/libglusterfs/src/glusterfs/inode.h @@ -35,11 +35,12 @@ typedef struct _dentry dentry_t; struct _inode_table { pthread_mutex_t lock; - size_t hashsize; /* bucket size of inode hash and dentry hash */ - char *name; /* name of the inode table, just for gf_log() */ - inode_t *root; /* root directory inode, with number 1 */ - xlator_t *xl; /* xlator to be called to do purge */ - uint32_t lru_limit; /* maximum LRU cache size */ + size_t dentry_hashsize; /* Number of buckets for dentry hash*/ + size_t inode_hashsize; /* Size of inode hash table */ + char *name; /* name of the inode table, just for gf_log() */ + inode_t *root; /* root directory inode, with number 1 */ + xlator_t *xl; /* xlator to be called to do purge */ + uint32_t lru_limit; /* maximum LRU cache size */ struct list_head *inode_hash; /* buckets for inode hash table */ struct list_head *name_hash; /* buckets for dentry hash table */ struct list_head active; /* list of inodes currently active (in an fop) */ @@ -120,12 +121,14 @@ struct _inode { #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1) inode_table_t * -inode_table_new(uint32_t lru_limit, xlator_t *xl); +inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size, + uint32_t inodehash_size); inode_table_t * inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, int32_t (*invalidator_fn)(xlator_t *, inode_t *), - xlator_t *invalidator_xl); + xlator_t *invalidator_xl, uint32_t dentry_hashsize, + uint32_t inode_hashsize); void inode_table_destroy_all(glusterfs_ctx_t *ctx); diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index dbadf77442..6f81f45b0f 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -792,7 +792,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) return NULL; } - int hash = hash_dentry(parent, name, table->hashsize); + int hash = hash_dentry(parent, name, table->dentry_hashsize); pthread_mutex_lock(&table->lock); { @@ -868,7 +868,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name, return ret; } - int hash = hash_dentry(parent, name, table->hashsize); + int hash = hash_dentry(parent, name, table->dentry_hashsize); pthread_mutex_lock(&table->lock); { @@ -932,7 +932,7 @@ inode_find(inode_table_t *table, uuid_t gfid) return NULL; } - int hash = hash_gfid(gfid, 65536); + int hash = hash_gfid(gfid, table->inode_hashsize); pthread_mutex_lock(&table->lock); { @@ -994,7 +994,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, return NULL; } - int ihash = hash_gfid(iatt->ia_gfid, 65536); + int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize); old_inode = __inode_find(table, iatt->ia_gfid, ihash); @@ -1074,7 +1074,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) table = inode->table; if (parent && name) { - hash = hash_dentry(parent, name, table->hashsize); + hash = hash_dentry(parent, name, table->dentry_hashsize); } if (name && strchr(name, '/')) { @@ -1293,7 +1293,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, } if (dstdir && dstname) { - hash = hash_dentry(dstdir, dstname, table->hashsize); + hash = hash_dentry(dstdir, dstname, table->dentry_hashsize); } pthread_mutex_lock(&table->lock); @@ -1658,7 +1658,8 @@ __inode_table_init_root(inode_table_t *table) inode_table_t * inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, int32_t (*invalidator_fn)(xlator_t *, inode_t *), - xlator_t *invalidator_xl) + xlator_t *invalidator_xl, uint32_t dentry_hashsize, + uint32_t inode_hashsize) { inode_table_t *new = NULL; uint32_t mem_pool_size = lru_limit; @@ -1676,7 +1677,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, new->invalidator_fn = invalidator_fn; new->invalidator_xl = invalidator_xl; - new->hashsize = 14057; /* TODO: Random Number?? */ + if (dentry_hashsize == 0) { + /* Prime number for uniform distribution */ + new->dentry_hashsize = 14057; + } else { + new->dentry_hashsize = dentry_hashsize; + } + + if (inode_hashsize == 0) { + /* The size of hash table always should be power of 2 */ + new->inode_hashsize = 65536; + } else { + new->inode_hashsize = inode_hashsize; + } /* In case FUSE is initing the inode table. */ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) @@ -1690,12 +1703,14 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, if (!new->dentry_pool) goto out; - new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head), + new->inode_hash = (void *)GF_CALLOC(new->inode_hashsize, + sizeof(struct list_head), gf_common_mt_list_head); if (!new->inode_hash) goto out; - new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head), + new->name_hash = (void *)GF_CALLOC(new->dentry_hashsize, + sizeof(struct list_head), gf_common_mt_list_head); if (!new->name_hash) goto out; @@ -1707,11 +1722,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, if (!new->fd_mem_pool) goto out; - for (i = 0; i < 65536; i++) { + for (i = 0; i < new->inode_hashsize; i++) { INIT_LIST_HEAD(&new->inode_hash[i]); } - for (i = 0; i < new->hashsize; i++) { + for (i = 0; i < new->dentry_hashsize; i++) { INIT_LIST_HEAD(&new->name_hash[i]); } @@ -1751,10 +1766,12 @@ out: } inode_table_t * -inode_table_new(uint32_t lru_limit, xlator_t *xl) +inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize, + uint32_t inode_hashsize) { /* Only fuse for now requires the inode table with invalidator */ - return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); + return inode_table_with_invalidator(lru_limit, xl, NULL, NULL, + dentry_hashsize, inode_hashsize); } int @@ -2474,8 +2491,10 @@ inode_table_dump(inode_table_t *itable, char *prefix) return; } - gf_proc_dump_build_key(key, prefix, "hashsize"); - gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize); + gf_proc_dump_build_key(key, prefix, "dentry_hashsize"); + gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize); + gf_proc_dump_build_key(key, prefix, "inode_hashsize"); + gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize); gf_proc_dump_build_key(key, prefix, "name"); gf_proc_dump_write(key, "%s", itable->name); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index df7366f0a6..b60b3ed9b9 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -633,7 +633,15 @@ init(xlator_t *this) goto out; } - this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this); + if (priv->shd.iamshd) { + /* Number of hash bucket should be prime number so declare 131 + total dentry hash buckets + */ + this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128); + } else { + this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0); + } + if (!this->itable) { ret = -ENOMEM; goto out; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 8ba8082bd8..1293fe1f86 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -2405,7 +2405,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode) inode_table_t *itable = NULL; static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; - itable = inode_table_new(0, this); + itable = inode_table_new(0, this, 0, 0); if (!itable) return; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 7344be4968..ee11535cfe 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -875,7 +875,7 @@ init(xlator_t *this) if (ec_assign_read_mask(ec, read_mask_str)) goto failed; - this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this); + this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0); if (!this->itable) goto failed; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index a2f1c343a1..84b30911d0 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -1634,7 +1634,7 @@ notify(xlator_t *this, int32_t event, void *data, ...) child->child_up = 1; child->xl = subvol; if (!child->table) - child->table = inode_table_new(4096, subvol); + child->table = inode_table_new(4096, subvol, 0, 0); _br_qchild_event(this, child, br_brick_connect); pthread_cond_signal(&priv->cond); diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c index 51ff1d7e98..bf996b3904 100644 --- a/xlators/features/quota/src/quotad-helpers.c +++ b/xlators/features/quota/src/quotad-helpers.c @@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req) UNLOCK(&priv->lock); if (active_subvol->itable == NULL) - active_subvol->itable = inode_table_new(4096, active_subvol); + active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0); state->itable = active_subvol->itable; diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index 7d09cba3e9..b8acc14871 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -2501,7 +2501,7 @@ init(xlator_t *this) goto out; } - priv->trash_itable = inode_table_new(0, this); + priv->trash_itable = inode_table_new(0, this, 0, 0); gf_log(this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path); this->private = (void *)priv; diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 0e22fe411e..042f1c6c4c 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -6358,10 +6358,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph) } #if FUSE_KERNEL_MINOR_VERSION >= 11 - itable = inode_table_with_invalidator(priv->lru_limit, graph->top, - fuse_inode_invalidate_fn, this); + itable = inode_table_with_invalidator( + priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0); #else - itable = inode_table_new(0, graph->top); + itable = inode_table_new(0, graph->top, 0, 0); #endif if (!itable) { ret = -1; diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index 39b73f88ac..577dd94487 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -565,7 +565,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl) return -1; lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT; - xl->itable = inode_table_new(lrusize, xl); + xl->itable = inode_table_new(lrusize, xl, 0, 0); if (!xl->itable) { gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY, "Failed to allocate inode table"); diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index 85c87c1ab8..1aed4dd011 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -637,7 +637,7 @@ server_setvolume(rpcsvc_request_t *req) /* TODO: what is this ? */ client->bound_xl->itable = inode_table_new(conf->inode_lru_limit, - client->bound_xl); + client->bound_xl, 0, 0); } } UNLOCK(&conf->itable_lock); |