summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormohit84 <moagrawa@redhat.com>2020-10-11 10:56:57 +0530
committerGitHub <noreply@github.com>2020-10-11 10:56:57 +0530
commitecdc77ceb9a5864be1fd0b3d7f919fa9ce60132e (patch)
tree43604064dee2495ebbd32d777778508fa90eb07c
parent8d54899724a31f29848e1461f68ce2cf40585056 (diff)
downloadglusterfs-ecdc77ceb9a5864be1fd0b3d7f919fa9ce60132e.tar.gz
glusterfs-ecdc77ceb9a5864be1fd0b3d7f919fa9ce60132e.tar.xz
glusterfs-ecdc77ceb9a5864be1fd0b3d7f919fa9ce60132e.zip
core: configure optimum inode table hash_size for shd (#1576)
In brick_mux environment a shd process consume high memory. After print the statedump i have found it allocates 1M per afr xlator for all bricks.In case of configure 4k volumes it consumes almost total 6G RSS size in which 4G consumes by inode_tables [cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage] size=1273488 num_allocs=2 max_size=1273488 max_num_allocs=2 total_allocs=2 inode_new_table function allocates memory(1M) for a list of inode and dentry hash. For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce RSS size for shd process pass optimum bucket count at the time of creating inode_table. Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb Fixes: #1538 Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
-rw-r--r--api/src/glfs-master.c2
-rw-r--r--libglusterfs/src/glusterfs/inode.h17
-rw-r--r--libglusterfs/src/inode.c51
-rw-r--r--xlators/cluster/afr/src/afr.c10
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c2
-rw-r--r--xlators/cluster/ec/src/ec.c2
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c2
-rw-r--r--xlators/features/quota/src/quotad-helpers.c2
-rw-r--r--xlators/features/trash/src/trash.c2
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c6
-rw-r--r--xlators/nfs/server/src/nfs.c2
-rw-r--r--xlators/protocol/server/src/server-handshake.c2
12 files changed, 65 insertions, 35 deletions
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
index 100dcc16cc..331a60be95 100644
--- a/api/src/glfs-master.c
+++ b/api/src/glfs-master.c
@@ -39,7 +39,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph)
}
if (!new_subvol->itable) {
- itable = inode_table_new(131072, new_subvol);
+ itable = inode_table_new(131072, new_subvol, 0, 0);
if (!itable) {
errno = ENOMEM;
ret = -1;
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
index 4b28da510c..c126822698 100644
--- a/libglusterfs/src/glusterfs/inode.h
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -35,11 +35,12 @@ typedef struct _dentry dentry_t;
struct _inode_table {
pthread_mutex_t lock;
- size_t hashsize; /* bucket size of inode hash and dentry hash */
- char *name; /* name of the inode table, just for gf_log() */
- inode_t *root; /* root directory inode, with number 1 */
- xlator_t *xl; /* xlator to be called to do purge */
- uint32_t lru_limit; /* maximum LRU cache size */
+ size_t dentry_hashsize; /* Number of buckets for dentry hash*/
+ size_t inode_hashsize; /* Size of inode hash table */
+ char *name; /* name of the inode table, just for gf_log() */
+ inode_t *root; /* root directory inode, with number 1 */
+ xlator_t *xl; /* xlator to be called to do purge */
+ uint32_t lru_limit; /* maximum LRU cache size */
struct list_head *inode_hash; /* buckets for inode hash table */
struct list_head *name_hash; /* buckets for dentry hash table */
struct list_head active; /* list of inodes currently active (in an fop) */
@@ -120,12 +121,14 @@ struct _inode {
#define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
inode_table_t *
-inode_table_new(uint32_t lru_limit, xlator_t *xl);
+inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size,
+ uint32_t inodehash_size);
inode_table_t *
inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
int32_t (*invalidator_fn)(xlator_t *, inode_t *),
- xlator_t *invalidator_xl);
+ xlator_t *invalidator_xl, uint32_t dentry_hashsize,
+ uint32_t inode_hashsize);
void
inode_table_destroy_all(glusterfs_ctx_t *ctx);
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index dbadf77442..6f81f45b0f 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -792,7 +792,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
return NULL;
}
- int hash = hash_dentry(parent, name, table->hashsize);
+ int hash = hash_dentry(parent, name, table->dentry_hashsize);
pthread_mutex_lock(&table->lock);
{
@@ -868,7 +868,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
return ret;
}
- int hash = hash_dentry(parent, name, table->hashsize);
+ int hash = hash_dentry(parent, name, table->dentry_hashsize);
pthread_mutex_lock(&table->lock);
{
@@ -932,7 +932,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
return NULL;
}
- int hash = hash_gfid(gfid, 65536);
+ int hash = hash_gfid(gfid, table->inode_hashsize);
pthread_mutex_lock(&table->lock);
{
@@ -994,7 +994,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
return NULL;
}
- int ihash = hash_gfid(iatt->ia_gfid, 65536);
+ int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize);
old_inode = __inode_find(table, iatt->ia_gfid, ihash);
@@ -1074,7 +1074,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
table = inode->table;
if (parent && name) {
- hash = hash_dentry(parent, name, table->hashsize);
+ hash = hash_dentry(parent, name, table->dentry_hashsize);
}
if (name && strchr(name, '/')) {
@@ -1293,7 +1293,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
}
if (dstdir && dstname) {
- hash = hash_dentry(dstdir, dstname, table->hashsize);
+ hash = hash_dentry(dstdir, dstname, table->dentry_hashsize);
}
pthread_mutex_lock(&table->lock);
@@ -1658,7 +1658,8 @@ __inode_table_init_root(inode_table_t *table)
inode_table_t *
inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
int32_t (*invalidator_fn)(xlator_t *, inode_t *),
- xlator_t *invalidator_xl)
+ xlator_t *invalidator_xl, uint32_t dentry_hashsize,
+ uint32_t inode_hashsize)
{
inode_table_t *new = NULL;
uint32_t mem_pool_size = lru_limit;
@@ -1676,7 +1677,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
new->invalidator_fn = invalidator_fn;
new->invalidator_xl = invalidator_xl;
- new->hashsize = 14057; /* TODO: Random Number?? */
+ if (dentry_hashsize == 0) {
+ /* Prime number for uniform distribution */
+ new->dentry_hashsize = 14057;
+ } else {
+ new->dentry_hashsize = dentry_hashsize;
+ }
+
+ if (inode_hashsize == 0) {
+ /* The size of hash table always should be power of 2 */
+ new->inode_hashsize = 65536;
+ } else {
+ new->inode_hashsize = inode_hashsize;
+ }
/* In case FUSE is initing the inode table. */
if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
@@ -1690,12 +1703,14 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
if (!new->dentry_pool)
goto out;
- new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head),
+ new->inode_hash = (void *)GF_CALLOC(new->inode_hashsize,
+ sizeof(struct list_head),
gf_common_mt_list_head);
if (!new->inode_hash)
goto out;
- new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head),
+ new->name_hash = (void *)GF_CALLOC(new->dentry_hashsize,
+ sizeof(struct list_head),
gf_common_mt_list_head);
if (!new->name_hash)
goto out;
@@ -1707,11 +1722,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
if (!new->fd_mem_pool)
goto out;
- for (i = 0; i < 65536; i++) {
+ for (i = 0; i < new->inode_hashsize; i++) {
INIT_LIST_HEAD(&new->inode_hash[i]);
}
- for (i = 0; i < new->hashsize; i++) {
+ for (i = 0; i < new->dentry_hashsize; i++) {
INIT_LIST_HEAD(&new->name_hash[i]);
}
@@ -1751,10 +1766,12 @@ out:
}
inode_table_t *
-inode_table_new(uint32_t lru_limit, xlator_t *xl)
+inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize,
+ uint32_t inode_hashsize)
{
/* Only fuse for now requires the inode table with invalidator */
- return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
+ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL,
+ dentry_hashsize, inode_hashsize);
}
int
@@ -2474,8 +2491,10 @@ inode_table_dump(inode_table_t *itable, char *prefix)
return;
}
- gf_proc_dump_build_key(key, prefix, "hashsize");
- gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize);
+ gf_proc_dump_build_key(key, prefix, "dentry_hashsize");
+ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize);
+ gf_proc_dump_build_key(key, prefix, "inode_hashsize");
+ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize);
gf_proc_dump_build_key(key, prefix, "name");
gf_proc_dump_write(key, "%s", itable->name);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index df7366f0a6..b60b3ed9b9 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -633,7 +633,15 @@ init(xlator_t *this)
goto out;
}
- this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
+ if (priv->shd.iamshd) {
+ /* Number of hash bucket should be prime number so declare 131
+ total dentry hash buckets
+ */
+ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128);
+ } else {
+ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0);
+ }
+
if (!this->itable) {
ret = -ENOMEM;
goto out;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 8ba8082bd8..1293fe1f86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2405,7 +2405,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode)
inode_table_t *itable = NULL;
static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
- itable = inode_table_new(0, this);
+ itable = inode_table_new(0, this, 0, 0);
if (!itable)
return;
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 7344be4968..ee11535cfe 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -875,7 +875,7 @@ init(xlator_t *this)
if (ec_assign_read_mask(ec, read_mask_str))
goto failed;
- this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
+ this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0);
if (!this->itable)
goto failed;
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index a2f1c343a1..84b30911d0 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -1634,7 +1634,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
child->child_up = 1;
child->xl = subvol;
if (!child->table)
- child->table = inode_table_new(4096, subvol);
+ child->table = inode_table_new(4096, subvol, 0, 0);
_br_qchild_event(this, child, br_brick_connect);
pthread_cond_signal(&priv->cond);
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
index 51ff1d7e98..bf996b3904 100644
--- a/xlators/features/quota/src/quotad-helpers.c
+++ b/xlators/features/quota/src/quotad-helpers.c
@@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req)
UNLOCK(&priv->lock);
if (active_subvol->itable == NULL)
- active_subvol->itable = inode_table_new(4096, active_subvol);
+ active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0);
state->itable = active_subvol->itable;
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index 7d09cba3e9..b8acc14871 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -2501,7 +2501,7 @@ init(xlator_t *this)
goto out;
}
- priv->trash_itable = inode_table_new(0, this);
+ priv->trash_itable = inode_table_new(0, this, 0, 0);
gf_log(this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path);
this->private = (void *)priv;
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 0e22fe411e..042f1c6c4c 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -6358,10 +6358,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph)
}
#if FUSE_KERNEL_MINOR_VERSION >= 11
- itable = inode_table_with_invalidator(priv->lru_limit, graph->top,
- fuse_inode_invalidate_fn, this);
+ itable = inode_table_with_invalidator(
+ priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0);
#else
- itable = inode_table_new(0, graph->top);
+ itable = inode_table_new(0, graph->top, 0, 0);
#endif
if (!itable) {
ret = -1;
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index 39b73f88ac..577dd94487 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -565,7 +565,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl)
return -1;
lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
- xl->itable = inode_table_new(lrusize, xl);
+ xl->itable = inode_table_new(lrusize, xl, 0, 0);
if (!xl->itable) {
gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
"Failed to allocate inode table");
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index 85c87c1ab8..1aed4dd011 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -637,7 +637,7 @@ server_setvolume(rpcsvc_request_t *req)
/* TODO: what is this ? */
client->bound_xl->itable = inode_table_new(conf->inode_lru_limit,
- client->bound_xl);
+ client->bound_xl, 0, 0);
}
}
UNLOCK(&conf->itable_lock);