diff options
author | Pranith Kumar Karampuri <pranith.karampuri@phonepe.com> | 2021-03-22 10:19:27 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-22 10:19:27 +0530 |
commit | ec189a499d85c2aad1d54e55e47df6b95ba02922 (patch) | |
tree | 3211292a75638e8612486b6e981158a3a5bce0cf /xlators/cluster/dht/src/dht-rebalance.c | |
parent | 1da141ab38463c4a26474456eec81c5992367af9 (diff) | |
download | glusterfs-ec189a499d85c2aad1d54e55e47df6b95ba02922.tar.gz glusterfs-ec189a499d85c2aad1d54e55e47df6b95ba02922.tar.xz glusterfs-ec189a499d85c2aad1d54e55e47df6b95ba02922.zip |
cluster/dht: use readdir for fix-layout in rebalance (#2243)
Problem:
On a cluster with 15 million files, when fix-layout was started, it was
not progressing at all. So we tried to do a os.walk() + os.stat() on the
backend filesystem directly. It took 2.5 days. We removed os.stat() and
re-ran it on another brick with similar data-set. It took 15 minutes. We
realized that readdirp is extremely costly compared to readdir if the
stat is not useful. fix-layout operation only needs to know that the
entry is a directory so that fix-layout operation can be triggered on
it. Most of the modern filesystems provide this information in readdir
operation. We don't need readdirp i.e. readdir+stat.
Fix:
Use readdir operation in fix-layout. Do readdir+stat/lookup for
filesystems that don't provide d_type in readdir operation.
fixes: #2241
Change-Id: I5fe2ecea25a399ad58e31a2e322caf69fc7f49eb
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-rebalance.c')
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 59 |
1 files changed, 24 insertions, 35 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index c3d60b7d2d..d15c36bf57 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -3608,6 +3608,9 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, struct iatt iatt = { 0, }; + struct iatt entry_iatt = { + 0, + }; inode_t *linked_inode = NULL, *inode = NULL; dht_conf_t *conf = NULL; int perrno = 0; @@ -3643,6 +3646,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } + linked_inode = inode_link(loc->inode, loc->parent, loc->name, &iatt); + + inode = loc->inode; + loc->inode = linked_inode; + inode_unref(inode); + fd = fd_create(loc->inode, defrag->pid); if (!fd) { gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); @@ -3675,8 +3684,8 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, fd_bind(fd); INIT_LIST_HEAD(&entries.list); - while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, NULL, - NULL)) != 0) { + while ((ret = syncop_readdir(this, fd, 131072, offset, &entries, NULL, + NULL)) != 0) { if (ret < 0) { if (-ret == ENOENT || -ret == ESTALE) { if (conf->decommission_subvols_cnt) { @@ -3711,9 +3720,11 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - if (!IA_ISDIR(entry->d_stat.ia_type)) { + + if ((DT_DIR != entry->d_type) && (DT_UNKNOWN != entry->d_type)) { continue; } + loc_wipe(&entry_loc); ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name); @@ -3734,40 +3745,18 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } } - if (gf_uuid_is_null(entry->d_stat.ia_gfid)) { - gf_log(this->name, GF_LOG_ERROR, - "%s/%s" - " gfid not present", - loc->path, entry->d_name); - continue; - } - - gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); - - /*In case the gfid stored in the inode by inode_link - * and the gfid obtained in the lookup differs, then - * client3_3_lookup_cbk will return ESTALE and proper - * error will be captured - */ - - linked_inode = inode_link(entry_loc.inode, loc->inode, - entry->d_name, &entry->d_stat); - - inode = entry_loc.inode; - entry_loc.inode = linked_inode; - inode_unref(inode); - - if (gf_uuid_is_null(loc->gfid)) { - gf_log(this->name, GF_LOG_ERROR, - "%s/%s" - " gfid not present", - loc->path, entry->d_name); - defrag->total_failures++; - continue; + if (DT_UNKNOWN == entry->d_type) { + ret = syncop_lookup(this, &entry_loc, &entry_iatt, NULL, NULL, + NULL); + if ((ret == 0) && (entry_iatt.ia_type != IA_IFDIR)) { + continue; + } + /*If it is directory, gf_defrag_fix_layout() call will again do + * one more lookup. Not optimizing this part as all modern + * filesystems populate entry->d_type. We can optimize it when + * such a filesystem is found.*/ } - gf_uuid_copy(entry_loc.pargfid, loc->gfid); - /* A return value of 2 means, either process_dir or * lookup of a dir failed. Hence, don't commit hash * for the current directory*/ |