From c48cbccfafbcf71aaad4ed7d868dbac609bc34fe Mon Sep 17 00:00:00 2001 From: Shyam Date: Mon, 23 Feb 2015 10:00:39 -0500 Subject: epoll: Fix broken RPC throttling due to MT epoll The RPC throttle which kicks in by setting the poll-in event on a socket to false, is broken with the MT epoll commit. This is due to the event handler of poll-in attempting to read as much out of the socket till it receives an EAGAIN. Which may never happen and hence we would be processing far more RPCs that we want to. This is being fixed by changing the epoll from ET to LT, and reading request by request, so that we honor the throttle. The downside is that we do not drain the socket, but go back to epoll_wait before reading the next request, but when kicking in throttle, we need to anyway and so a busy connection would degrade to LT anyway to maintain the throttle. As a result this change should not cause deviation in the performance much for busy connections. Change-Id: I522d284d2d0f40e1812ab4c1a453c8aec666464c BUG: 1192114 Signed-off-by: Shyam Reviewed-on: http://review.gluster.org/9726 Tested-by: Gluster Build System Reviewed-by: Krishnan Parthasarathi Reviewed-by: Raghavendra G Tested-by: Raghavendra G --- libglusterfs/src/event-epoll.c | 4 ++-- rpc/rpc-transport/socket/src/socket.c | 14 +++----------- tests/basic/ec/nfs.t | 23 +++++++++++++++++++++++ 3 files changed, 28 insertions(+), 13 deletions(-) create mode 100755 tests/basic/ec/nfs.t diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c index a8e9a08b93..62085dbef1 100644 --- a/libglusterfs/src/event-epoll.c +++ b/libglusterfs/src/event-epoll.c @@ -335,7 +335,7 @@ event_register_epoll (struct event_pool *event_pool, int fd, LOCK (&slot->lock); { - /* make epoll edge triggered and 'singleshot', which + /* make epoll 'singleshot', which means we need to re-add the fd with epoll_ctl(EPOLL_CTL_MOD) after delivery of every single event. This assures us that while a poller @@ -344,7 +344,7 @@ event_register_epoll (struct event_pool *event_pool, int fd, time as well. */ - slot->events = EPOLLPRI | EPOLLET | EPOLLONESHOT; + slot->events = EPOLLPRI | EPOLLONESHOT; slot->handler = handler; slot->data = data; diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index a7e2bb0cfd..2b61eb417d 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -2263,16 +2263,9 @@ socket_event_poll_in (rpc_transport_t *this) rpc_transport_pollin_t *pollin = NULL; socket_private_t *priv = this->private; - do { - /* consume all we can, this is our only chance - (Edge Triggered polling in epoll) - */ - pollin = NULL; - ret = socket_proto_state_machine (this, &pollin); - - if (!pollin) - break; + ret = socket_proto_state_machine (this, &pollin); + if (pollin) { priv->ot_state = OT_CALLBACK; ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_RECEIVED, pollin); @@ -2280,8 +2273,7 @@ socket_event_poll_in (rpc_transport_t *this) priv->ot_state = OT_RUNNING; } rpc_transport_pollin_destroy (pollin); - - } while (pollin); + } return ret; } diff --git a/tests/basic/ec/nfs.t b/tests/basic/ec/nfs.t new file mode 100755 index 0000000000..b826296d58 --- /dev/null +++ b/tests/basic/ec/nfs.t @@ -0,0 +1,23 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../nfs.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5} +EXPECT "Created" volinfo_field $V0 'Status' +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status' + +EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +TEST mount_nfs $H0:/$V0 $N0 nolock + +TEST dd if=/dev/zero of=$N0/test bs=1024k count=1k + +## Before killing daemon to avoid deadlocks +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 + +cleanup -- cgit