[PATCH 1/2] Turn on SO_KEEPALIVE on all TCP connections.

Martin Blix Grydeland martin at varnish-software.com
Thu Feb 14 22:45:48 CET 2013


This will help in determining remote hang up of the connection for
situations where we still are not able to send any reply, but freeing
the session will reduce resource overhead (e.g. when staying on
waitinglists for extended periods).

On platforms that support it also add runtime parameters to control
the keep-alive packet settings through socket options. On platforms
that don't support these socket options, the values must be set system
wide.

The Varnish runtime parameters will only be applied when they are less
than the system default.
---
 bin/varnishd/cache/cache_acceptor.c       |  160 +++++++++++++++++++++++++++++
 bin/varnishd/common/params.h              |    5 +
 bin/varnishd/mgt/mgt_param_tbl.c          |   25 +++++
 configure.ac                              |   32 ++++++
 doc/sphinx/installation/platformnotes.rst |   25 +++++
 5 files changed, 247 insertions(+)

diff --git a/bin/varnishd/cache/cache_acceptor.c b/bin/varnishd/cache/cache_acceptor.c
index 62209a5..ee6b179 100644
--- a/bin/varnishd/cache/cache_acceptor.c
+++ b/bin/varnishd/cache/cache_acceptor.c
@@ -70,8 +70,26 @@ static const struct linger linger = {
 	.l_onoff	=	0,
 };
 
+/*
+ * We turn on keepalives by default to assist in detecting clients that have
+ * hung up on connections returning from waitinglists
+ */
+static const int keepalive = 1;
+
 static unsigned char	need_sndtimeo, need_rcvtimeo, need_linger, need_test,
 			need_tcpnodelay;
+static unsigned char	need_keepalive = 0;
+#ifdef HAVE_TCP_KEEP
+static unsigned char	need_ka_time = 0;
+static unsigned char	need_ka_probes = 0;
+static unsigned char	need_ka_intvl = 0;
+static int		ka_time_cur = 0;
+static int		ka_probes_cur = 0;
+static int		ka_intvl_cur = 0;
+static int		ka_time, ka_time_sys;
+static int		ka_probes, ka_probes_sys;
+static int		ka_intvl, ka_intvl_sys;
+#endif
 
 /*--------------------------------------------------------------------
  * Some kernels have bugs/limitations with respect to which options are
@@ -83,6 +101,10 @@ static void
 sock_test(int fd)
 {
 	struct linger lin;
+	int tka;
+#ifdef HAVE_TCP_KEEP
+	int tka_time, tka_probes, tka_intvl;
+#endif
 	struct timeval tv;
 	socklen_t l;
 	int i, tcp_nodelay;
@@ -97,6 +119,48 @@ sock_test(int fd)
 	if (memcmp(&lin, &linger, l))
 		need_linger = 1;
 
+	l = sizeof tka;
+	i = getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &tka, &l);
+	if (i) {
+		VTCP_Assert(i);
+		return;
+	}
+	assert(l == sizeof tka);
+	if (tka != keepalive)
+		need_keepalive = 1;
+
+#ifdef HAVE_TCP_KEEP
+	l = sizeof tka_time;
+	i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &tka_time, &l);
+	if (i) {
+		VTCP_Assert(i);
+		return;
+	}
+	assert(l == sizeof tka_time);
+	if (tka_time != ka_time_cur)
+		need_ka_time = 1;
+
+	l = sizeof tka_probes;
+	i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &tka_probes, &l);
+	if (i) {
+		VTCP_Assert(i);
+		return;
+	}
+	assert(l == sizeof tka_probes);
+	if (tka_probes != ka_probes_cur)
+		need_ka_probes = 1;
+
+	l = sizeof tka_intvl;
+	i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &tka_intvl, &l);
+	if (i) {
+		VTCP_Assert(i);
+		return;
+	}
+	assert(l == sizeof tka_intvl);
+	if (tka_intvl != ka_intvl_cur)
+		need_ka_intvl = 1;
+#endif
+
 #ifdef SO_SNDTIMEO_WORKS
 	l = sizeof tv;
 	i = getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &l);
@@ -281,6 +345,22 @@ VCA_SetupSess(struct worker *wrk, struct sess *sp)
 	if (need_linger)
 		VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_LINGER,
 		    &linger, sizeof linger));
+	if (need_keepalive)
+		VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_KEEPALIVE,
+		    &keepalive, sizeof keepalive));
+#ifdef HAVE_TCP_KEEP
+	AN(ka_time);
+	if (need_ka_time)
+		VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPIDLE,
+			&ka_time_cur, sizeof ka_time_cur));
+	if (need_ka_probes)
+		VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPCNT,
+			&ka_probes_cur, sizeof ka_probes_cur));
+	if (need_ka_intvl)
+		VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPINTVL,
+			&ka_intvl_cur, sizeof ka_intvl_cur));
+#endif
+
 #ifdef SO_SNDTIMEO_WORKS
 	if (need_sndtimeo)
 		VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_SNDTIMEO,
@@ -312,10 +392,17 @@ vca_acct(void *arg)
 	struct listen_sock *ls;
 	double t0, now;
 	int i;
+	socklen_t len;
 
 	THR_SetName("cache-acceptor");
 	(void)arg;
 
+#ifdef HAVE_TCP_KEEP
+	ka_time = cache_param->tcp_keepalive_time;
+	ka_probes = cache_param->tcp_keepalive_probes;
+	ka_intvl = cache_param->tcp_keepalive_intvl;
+#endif
+
 	VTAILQ_FOREACH(ls, &heritage.socks, list) {
 		if (ls->sock < 0)
 			continue;
@@ -324,6 +411,50 @@ vca_acct(void *arg)
 		    &linger, sizeof linger));
 		AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_NODELAY,
 		    &tcp_nodelay, sizeof tcp_nodelay));
+		AZ(setsockopt(ls->sock, SOL_SOCKET, SO_KEEPALIVE,
+		    &keepalive, sizeof keepalive));
+#ifdef HAVE_TCP_KEEP
+		if (!ka_time_cur) {
+			len = sizeof ka_time_sys;
+			AZ(getsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,
+				&ka_time_sys, &len));
+			assert(len == sizeof ka_time_sys);
+			AN(ka_time_sys);
+			ka_time_cur = ka_time =
+			    (ka_time_sys < cache_param->tcp_keepalive_time ?
+				ka_time_sys : cache_param->tcp_keepalive_time);
+		}
+		AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,
+		    &ka_time_cur, sizeof ka_time_cur));
+
+		if (!ka_probes_cur) {
+			len = sizeof ka_probes_sys;
+			AZ(getsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,
+			    &ka_probes_sys, &len));
+			assert(len == sizeof ka_probes_sys);
+			AN(ka_probes_sys);
+			ka_probes_cur = ka_probes =
+			    (ka_probes_sys < cache_param->tcp_keepalive_probes ?
+				ka_probes_sys :
+				cache_param->tcp_keepalive_probes);
+		}
+		AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,
+		    &ka_probes_cur, sizeof ka_probes_cur));
+
+		if (!ka_intvl_cur) {
+			len = sizeof ka_intvl_sys;
+			AZ(getsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,
+			    &ka_intvl_sys, &len));
+			assert(len == sizeof ka_intvl_sys);
+			AN(ka_intvl_sys);
+			ka_intvl_cur = ka_intvl =
+			    (ka_intvl_sys < cache_param->tcp_keepalive_intvl ?
+				ka_intvl_sys :
+				cache_param->tcp_keepalive_intvl);
+		}
+		AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,
+		    &ka_intvl_cur, sizeof ka_intvl_cur));
+#endif
 		if (cache_param->accept_filter) {
 			i = VTCP_filter_http(ls->sock);
 			if (i)
@@ -339,6 +470,35 @@ vca_acct(void *arg)
 	t0 = VTIM_real();
 	while (1) {
 		(void)sleep(1);
+#ifdef HAVE_TCP_KEEP
+		ka_time = (ka_time_sys < cache_param->tcp_keepalive_time ?
+		    ka_time_sys : cache_param->tcp_keepalive_time);
+		ka_probes = (ka_probes_sys < cache_param->tcp_keepalive_probes ?
+		    ka_probes_sys : cache_param->tcp_keepalive_probes);
+		ka_intvl = (ka_intvl_sys < cache_param->tcp_keepalive_intvl ?
+		    ka_intvl_sys : cache_param->tcp_keepalive_intvl);
+		if (ka_time_cur != ka_time ||
+		    ka_probes_cur != ka_probes ||
+		    ka_intvl_cur != ka_intvl) {
+			need_test = 1;
+			ka_time_cur = ka_time;
+			ka_probes_cur = ka_probes;
+			ka_intvl_cur = ka_intvl;
+			VTAILQ_FOREACH(ls, &heritage.socks, list) {
+				if (ls->sock < 0)
+					continue;
+				AZ(setsockopt(ls->sock, IPPROTO_TCP,
+				    TCP_KEEPIDLE,
+				    &ka_time_cur, sizeof ka_time_cur));
+				AZ(setsockopt(ls->sock, IPPROTO_TCP,
+				    TCP_KEEPCNT,
+				    &ka_probes_cur, sizeof ka_probes_cur));
+				AZ(setsockopt(ls->sock, IPPROTO_TCP,
+				    TCP_KEEPINTVL,
+				    &ka_intvl_cur, sizeof ka_intvl_cur));
+			}
+		}
+#endif
 #ifdef SO_SNDTIMEO_WORKS
 		if (cache_param->idle_send_timeout != send_timeout) {
 			need_test = 1;
diff --git a/bin/varnishd/common/params.h b/bin/varnishd/common/params.h
index a6e881b..ebeff0f 100644
--- a/bin/varnishd/common/params.h
+++ b/bin/varnishd/common/params.h
@@ -110,6 +110,11 @@ struct params {
 	unsigned		pipe_timeout;
 	unsigned		send_timeout;
 	unsigned		idle_send_timeout;
+#ifdef HAVE_TCP_KEEP
+	unsigned		tcp_keepalive_time;
+	unsigned		tcp_keepalive_probes;
+	unsigned		tcp_keepalive_intvl;
+#endif
 
 	/* Management hints */
 	unsigned		auto_restart;
diff --git a/bin/varnishd/mgt/mgt_param_tbl.c b/bin/varnishd/mgt/mgt_param_tbl.c
index 8601bae..b92c71b 100644
--- a/bin/varnishd/mgt/mgt_param_tbl.c
+++ b/bin/varnishd/mgt/mgt_param_tbl.c
@@ -205,6 +205,31 @@ const struct parspec mgt_parspec[] = {
 		"See setsockopt(2) under SO_SNDTIMEO for more information.",
 		DELAYED_EFFECT,
 		"60", "seconds" },
+#ifdef HAVE_TCP_KEEP
+	{ "tcp_keepalive_time", tweak_timeout, &mgt_param.tcp_keepalive_time,
+		1, 7200,
+		"The number of seconds a connection needs to be idle before "
+		"TCP begins sending out keep-alive probes. Note that this "
+		"setting will only take effect when it is less than the "
+		"system default.",
+		EXPERIMENTAL,
+		"600", "seconds" },
+	{ "tcp_keepalive_probes", tweak_uint, &mgt_param.tcp_keepalive_probes,
+		1, 100,
+		"The maximum number of TCP keep-alive probes to send before "
+		"giving up and killing the connection if no response is "
+		"obtained from the other end. Note that this setting will "
+		"only take effect when it is less than the system default.",
+		EXPERIMENTAL,
+		"5", "probes" },
+	{ "tcp_keepalive_intvl", tweak_timeout, &mgt_param.tcp_keepalive_intvl,
+		1, 100,
+		"The number of seconds between TCP keep-alive probes. Note "
+		"that this setting will only take effect when it is less than"
+		"the system default.",
+		EXPERIMENTAL,
+		"5", "seconds" },
+#endif
 	{ "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,
 		"Restart child process automatically if it dies.\n",
 		0,
diff --git a/configure.ac b/configure.ac
index a4cd8e8..76406d0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -423,6 +423,38 @@ if test "$ac_cv_so_rcvtimeo_works" = no ||
 fi
 LIBS="${save_LIBS}"
 
+# Check if the OS supports TCP_KEEP(CNT|IDLE|INTVL) socket options
+save_LIBS="${LIBS}"
+LIBS="${LIBS} ${NET_LIBS}"
+AC_CACHE_CHECK([for TCP_KEEP(CNT|IDLE|INTVL) socket options],
+  [ac_cv_have_tcp_keep],
+  [AC_RUN_IFELSE(
+    [AC_LANG_PROGRAM([[
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+    ]],[[
+int s = socket(AF_INET, SOCK_STREAM, 0);
+int i;
+i = 5;
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &i, sizeof i))
+  return (1);
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &i, sizeof i))
+  return (1);
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &i, sizeof i))
+  return (1);
+return (0);
+    ]])],
+    [ac_cv_have_tcp_keep=yes],
+    [ac_cv_have_tcp_keep=no])
+  ])
+if test "$ac_cv_have_tcp_keep" = yes; then
+   AC_DEFINE([HAVE_TCP_KEEP], [1], [Define if OS supports TCP_KEEP* socket options])
+fi
+LIBS="${save_LIBS}"
+
 # Run-time directory
 VARNISH_STATE_DIR='${localstatedir}/varnish'
 AC_SUBST(VARNISH_STATE_DIR)
diff --git a/doc/sphinx/installation/platformnotes.rst b/doc/sphinx/installation/platformnotes.rst
index 3ad486c..048442c 100644
--- a/doc/sphinx/installation/platformnotes.rst
+++ b/doc/sphinx/installation/platformnotes.rst
@@ -35,3 +35,28 @@ Reduce the maximum stack size by running::
 
 in the Varnish startup script.
 
+TCP keep-alive configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On some systems, Varnish is not able to set the TCP keep-alive values
+per socket, and therefor the tcp_keepalive_* Varnish runtime
+parameters are not available. On these platforms it can be benefitial
+to tune the system wide values for these in order to more reliably
+detect remote close for sessions spending long time on
+waitinglists. This will help free up resources faster.
+
+Systems to not support TCP keep-alive values per socket include:
+
+- Solaris releases prior to version 11
+- FreeBSD releases prior to version 9.1
+- OS X releases prior to Mountain Lion
+
+On platforms with the necessary socket options the defaults are set
+to:
+
+- tcp_keepalive_time = 600 seconds
+- tcp_keepalive_probes = 5
+- tcp_keepalive_intvl = 5 seconds
+
+Note that Varnish will only apply these run-time parameters so long as
+they are less than the system default value.
-- 
1.7.10.4




More information about the varnish-dev mailing list