summaryrefslogtreecommitdiffstats
path: root/rpc/rpc-lib
diff options
context:
space:
mode:
authorNiels de Vos <ndevos@redhat.com>2015-02-17 12:12:11 +0100
committerKaleb KEITHLEY <kkeithle@redhat.com>2015-03-17 05:10:17 -0700
commit6b3704990257643da54100d8581856a7d2c72f86 (patch)
tree53256b40fc8f36aa0989649a69867acd2fa38faa /rpc/rpc-lib
parent2b97b57cd8c71cb07b7002cf3483e9cfc9403c58 (diff)
downloadglusterfs-6b3704990257643da54100d8581856a7d2c72f86.tar.gz
glusterfs-6b3704990257643da54100d8581856a7d2c72f86.tar.xz
glusterfs-6b3704990257643da54100d8581856a7d2c72f86.zip
socket: use TCP_USER_TIMEOUT to detect client failures quicker
Use the network.ping-timeout to set the TCP_USER_TIMEOUT socket option (see 'man 7 tcp'). The option sets the transport.tcp-user-timeout option that is handled in the rpc/socket layer on the protocol/server side. This socket option makes detecting unclean disconnected clients more reliable. When the socket gets closed, any locks that the client held are been released. This makes it possible to reduce the fail-over time for applications that run on systems that became unreachable due to a network partition or general system error client-side (kernel panic, hang, ...). It is not trivial to create a test-case for this at the moment. We need a client that unclean disconnects and an other client that tries to take over the lock from the disconnected client. URL: http://supercolony.gluster.org/pipermail/gluster-devel/2014-May/040755.html Change-Id: I5e5f540a49abfb5f398291f1818583a63a5f4bb4 BUG: 1129787 Signed-off-by: Niels de Vos <ndevos@redhat.com> Reviewed-on: http://review.gluster.org/8065 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: soumya k <skoduri@redhat.com> Reviewed-by: Santosh Pradhan <santosh.pradhan@gmail.com> Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
Diffstat (limited to 'rpc/rpc-lib')
-rw-r--r--rpc/rpc-lib/src/rpc-transport.c7
-rw-r--r--rpc/rpc-lib/src/rpc-transport.h2
2 files changed, 7 insertions, 2 deletions
diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
index f6774b7235..23fbf37360 100644
--- a/rpc/rpc-lib/src/rpc-transport.c
+++ b/rpc/rpc-lib/src/rpc-transport.c
@@ -572,7 +572,7 @@ out:
//why call it if you dont set it.
int
rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
- int32_t time)
+ int32_t time, int32_t timeout)
{
int ret = -1;
@@ -588,6 +588,11 @@ rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
"transport.socket.keepalive-time", time);
if (ret)
goto out;
+
+ ret = dict_set_int32 (options,
+ "transport.tcp-user-timeout", timeout);
+ if (ret)
+ goto out;
out:
return ret;
}
diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
index d1c650e7ec..d0572a1633 100644
--- a/rpc/rpc-lib/src/rpc-transport.h
+++ b/rpc/rpc-lib/src/rpc-transport.h
@@ -306,7 +306,7 @@ rpc_transport_pollin_destroy (rpc_transport_pollin_t *pollin);
int
rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
- int32_t time);
+ int32_t time, int32_t timeout);
int
rpc_transport_unix_options_build (dict_t **options, char *filepath,