summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorXavi Hernandez <xhernandez@users.noreply.github.com>2021-02-24 15:04:23 +0100
committerGitHub <noreply@github.com>2021-02-24 15:04:23 +0100
commit70e6ee279e173651ac6c68c996914440ac4d8f0e (patch)
treebdfa39e7cd376747e7ec5403090176ae9d14da0e /xlators
parent80b65bc2ff4709f5a5d3dccaea24ccf9a3401337 (diff)
downloadglusterfs-70e6ee279e173651ac6c68c996914440ac4d8f0e.tar.gz
glusterfs-70e6ee279e173651ac6c68c996914440ac4d8f0e.tar.xz
glusterfs-70e6ee279e173651ac6c68c996914440ac4d8f0e.zip
cluster/dht: Fix stack overflow in readdir(p) (#2170)
When parallel-readdir is enabled, readdir(p) requests sent by DHT can be immediately processed and answered in the same thread before the call to STACK_WIND_COOKIE() completes. This means that the readdir(p) cbk is processed synchronously. In some cases it may decide to send another readdir(p) request, which causes a recursive call. When some special conditions happen and the directories are big, it's possible that the number of nested calls is so high that the process crashes because of a stack overflow. This patch fixes this by not allowing nested readdir(p) calls. When a nested call is detected, it's queued instead of sending it. The queued request is processed when the current call finishes by the top level stack function. Fixes: #2169 Change-Id: Id763a8a51fb3c3314588ec7c162f649babf33099 Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/dht/src/dht-common.c86
-rw-r--r--xlators/cluster/dht/src/dht-common.h5
2 files changed, 81 insertions, 10 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index da3bbc4f14..b98d9a8918 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -23,6 +23,8 @@
#include <libgen.h>
#include <signal.h>
+#include <urcu/uatomic.h>
+
static int
dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, gf_dirent_t *entries,
@@ -6627,6 +6629,74 @@ out:
return;
}
+/* Execute a READDIR request if no other request is in progress. Otherwise
+ * queue it to be executed when the current one finishes.
+ *
+ * When parallel-readdir is enabled and directory contents are cached, the
+ * callback of a readdirp will be called before returning from STACK_WIND.
+ * If the returned contents are not useful for DHT, and the buffer is not
+ * yet full, a nested readdirp request will be sent. This means that there
+ * will be many recursive calls. In the worst case there might be a stack
+ * overflow.
+ *
+ * To avoid this, we only wind a request if no other request is being wound.
+ * If there's another request, we simple store the values for the next call.
+ * When the thread processing the current wind completes it, it will take
+ * the new arguments and send the request from the top level stack. */
+static void
+dht_queue_readdir(call_frame_t *frame, xlator_t *xl, off_t offset,
+ fop_readdir_cbk_t cbk)
+{
+ dht_local_t *local;
+
+ local = frame->local;
+
+ local->queue_xl = xl;
+ local->queue_offset = offset;
+
+ if (uatomic_add_return(&local->queue, 1) == 1) {
+ /* If we are here it means that we are the first one to send a
+ * readdir request. Any attempt to send more readdir requests will
+ * find local->queue > 1, so it won't do anything. The needed data
+ * to send the request has been stored into local->queue_*.
+ *
+ * Note: this works because we will only have 1 additional request
+ * at most (the one called by the cbk function) while we are
+ * processing another readdir. */
+ do {
+ STACK_WIND_COOKIE(frame, cbk, local->queue_xl, local->queue_xl,
+ local->queue_xl->fops->readdir, local->fd,
+ local->size, local->queue_offset, local->xattr);
+
+ /* If a new readdirp request has been added before returning
+ * from winding, we process it. */
+ } while (uatomic_sub_return(&local->queue, 1) != 0);
+ }
+}
+
+/* Execute a READDIRP request if no other request is in progress. Otherwise
+ * queue it to be executed when the current one finishes. */
+static void
+dht_queue_readdirp(call_frame_t *frame, xlator_t *xl, off_t offset,
+ fop_readdirp_cbk_t cbk)
+{
+ dht_local_t *local;
+
+ local = frame->local;
+
+ local->queue_xl = xl;
+ local->queue_offset = offset;
+
+ /* Check dht_queue_readdir() comments for an explanation of this. */
+ if (uatomic_add_return(&local->queue, 1) == 1) {
+ do {
+ STACK_WIND_COOKIE(frame, cbk, local->queue_xl, local->queue_xl,
+ local->queue_xl->fops->readdirp, local->fd,
+ local->size, local->queue_offset, local->xattr);
+ } while (uatomic_sub_return(&local->queue, 1) != 0);
+ }
+}
+
/* Posix returns op_errno = ENOENT to indicate that there are no more
* entries
*/
@@ -6894,9 +6964,8 @@ done:
}
}
- STACK_WIND_COOKIE(frame, dht_readdirp_cbk, next_subvol, next_subvol,
- next_subvol->fops->readdirp, local->fd, local->size,
- next_offset, local->xattr);
+ dht_queue_readdirp(frame, next_subvol, next_offset, dht_readdirp_cbk);
+
return 0;
}
@@ -7015,9 +7084,8 @@ done:
goto unwind;
}
- STACK_WIND_COOKIE(frame, dht_readdir_cbk, next_subvol, next_subvol,
- next_subvol->fops->readdir, local->fd, local->size,
- next_offset, NULL);
+ dht_queue_readdir(frame, next_subvol, next_offset, dht_readdir_cbk);
+
return 0;
}
@@ -7116,11 +7184,9 @@ dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
}
- STACK_WIND_COOKIE(frame, dht_readdirp_cbk, xvol, xvol,
- xvol->fops->readdirp, fd, size, yoff, local->xattr);
+ dht_queue_readdirp(frame, xvol, yoff, dht_readdirp_cbk);
} else {
- STACK_WIND_COOKIE(frame, dht_readdir_cbk, xvol, xvol,
- xvol->fops->readdir, fd, size, yoff, local->xattr);
+ dht_queue_readdir(frame, xvol, yoff, dht_readdir_cbk);
}
return 0;
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index ac21b237b9..9d5c7e15e5 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -333,6 +333,11 @@ struct dht_local {
dht_dir_transaction_t lock[2], *current;
+ /* for nested readdirs */
+ xlator_t *queue_xl;
+ off_t queue_offset;
+ uint32_t queue;
+
/* inodelks during filerename for backward compatibility */
dht_lock_t **rename_inodelk_backward_compatible;