<div dir="ltr">FreeBSD seems to also have these sockets options since this commit:<div><br></div><div><a href="http://svnweb.freebsd.org/base?view=revision&revision=232945">http://svnweb.freebsd.org/base?view=revision&revision=232945</a><br>
</div><div><br></div><div style>At a quick glance, this went in for FreeBSD 9.1.</div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Feb 11, 2013 at 3:09 PM, Martin Blix Grydeland <span dir="ltr"><<a href="mailto:martin@varnish-software.com" target="_blank">martin@varnish-software.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">This will help in determining remote hang up of the connection for<br>
situations where we still are not able to send any reply, but freeing<br>
the session will reduce resource overhead (e.g. when staying on<br>
waitinglists for extended periods).<br>
<br>
On platforms that support it also add runtime parameters to control<br>
the keep-alive packet settings through socket options. On platforms<br>
that don't support these socket options, the values must be set system<br>
wide.<br>
---<br>
bin/varnishd/cache/cache_acceptor.c | 116 +++++++++++++++++++++++++++++<br>
bin/varnishd/common/params.h | 5 ++<br>
bin/varnishd/mgt/mgt_param_tbl.c | 20 +++++<br>
<a href="http://configure.ac" target="_blank">configure.ac</a> | 32 ++++++++<br>
doc/sphinx/installation/platformnotes.rst | 15 ++++<br>
5 files changed, 188 insertions(+)<br>
<br>
diff --git a/bin/varnishd/cache/cache_acceptor.c b/bin/varnishd/cache/cache_acceptor.c<br>
index 62209a5..4e17dfb 100644<br>
--- a/bin/varnishd/cache/cache_acceptor.c<br>
+++ b/bin/varnishd/cache/cache_acceptor.c<br>
@@ -70,8 +70,23 @@ static const struct linger linger = {<br>
.l_onoff = 0,<br>
};<br>
<br>
+/*<br>
+ * We turn on keepalives by default to assist in detecting clients that have<br>
+ * hung up on connections returning from waitinglists<br>
+ */<br>
+static const int keepalive = 1;<br>
+<br>
static unsigned char need_sndtimeo, need_rcvtimeo, need_linger, need_test,<br>
need_tcpnodelay;<br>
+static unsigned char need_keepalive = 0;<br>
+#ifdef TCP_KEEP_WORKS<br>
+static unsigned char need_ka_time = 0;<br>
+static unsigned char need_ka_probes = 0;<br>
+static unsigned char need_ka_intvl = 0;<br>
+static int ka_time = 0;<br>
+static int ka_probes = 0;<br>
+static int ka_intvl = 0;<br>
+#endif<br>
<br>
/*--------------------------------------------------------------------<br>
* Some kernels have bugs/limitations with respect to which options are<br>
@@ -83,6 +98,10 @@ static void<br>
sock_test(int fd)<br>
{<br>
struct linger lin;<br>
+ int tka;<br>
+#ifdef TCP_KEEP_WORKS<br>
+ int tka_time, tka_probes, tka_intvl;<br>
+#endif<br>
struct timeval tv;<br>
socklen_t l;<br>
int i, tcp_nodelay;<br>
@@ -97,6 +116,48 @@ sock_test(int fd)<br>
if (memcmp(&lin, &linger, l))<br>
need_linger = 1;<br>
<br>
+ l = sizeof tka;<br>
+ i = getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &tka, &l);<br>
+ if (i) {<br>
+ VTCP_Assert(i);<br>
+ return;<br>
+ }<br>
+ assert(l == sizeof tka);<br>
+ if (tka != keepalive)<br>
+ need_keepalive = 1;<br>
+<br>
+#ifdef TCP_KEEP_WORKS<br>
+ l = sizeof tka_time;<br>
+ i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &tka_time, &l);<br>
+ if (i) {<br>
+ VTCP_Assert(i);<br>
+ return;<br>
+ }<br>
+ assert(l == sizeof tka_time);<br>
+ if (tka_time != ka_time)<br>
+ need_ka_time = 1;<br>
+<br>
+ l = sizeof tka_probes;<br>
+ i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &tka_probes, &l);<br>
+ if (i) {<br>
+ VTCP_Assert(i);<br>
+ return;<br>
+ }<br>
+ assert(l == sizeof tka_probes);<br>
+ if (tka_probes != ka_probes)<br>
+ need_ka_probes = 1;<br>
+<br>
+ l = sizeof tka_intvl;<br>
+ i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &tka_intvl, &l);<br>
+ if (i) {<br>
+ VTCP_Assert(i);<br>
+ return;<br>
+ }<br>
+ assert(l == sizeof tka_intvl);<br>
+ if (tka_intvl != ka_intvl)<br>
+ need_ka_intvl = 1;<br>
+#endif<br>
+<br>
#ifdef SO_SNDTIMEO_WORKS<br>
l = sizeof tv;<br>
i = getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &l);<br>
@@ -281,6 +342,22 @@ VCA_SetupSess(struct worker *wrk, struct sess *sp)<br>
if (need_linger)<br>
VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_LINGER,<br>
&linger, sizeof linger));<br>
+ if (need_keepalive)<br>
+ VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_KEEPALIVE,<br>
+ &keepalive, sizeof keepalive));<br>
+#ifdef TCP_KEEP_WORKS<br>
+ AN(ka_time);<br>
+ if (need_ka_time)<br>
+ VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPIDLE,<br>
+ &ka_time, sizeof ka_time));<br>
+ if (need_ka_probes)<br>
+ VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPCNT,<br>
+ &ka_probes, sizeof ka_probes));<br>
+ if (need_ka_intvl)<br>
+ VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPINTVL,<br>
+ &ka_intvl, sizeof ka_intvl));<br>
+#endif<br>
+<br>
#ifdef SO_SNDTIMEO_WORKS<br>
if (need_sndtimeo)<br>
VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_SNDTIMEO,<br>
@@ -316,6 +393,12 @@ vca_acct(void *arg)<br>
THR_SetName("cache-acceptor");<br>
(void)arg;<br>
<br>
+#ifdef TCP_KEEP_WORKS<br>
+ ka_time = cache_param->tcp_keepalive_time;<br>
+ ka_probes = cache_param->tcp_keepalive_probes;<br>
+ ka_intvl = cache_param->tcp_keepalive_intvl;<br>
+#endif<br>
+<br>
VTAILQ_FOREACH(ls, &heritage.socks, list) {<br>
if (ls->sock < 0)<br>
continue;<br>
@@ -324,6 +407,16 @@ vca_acct(void *arg)<br>
&linger, sizeof linger));<br>
AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_NODELAY,<br>
&tcp_nodelay, sizeof tcp_nodelay));<br>
+ AZ(setsockopt(ls->sock, SOL_SOCKET, SO_KEEPALIVE,<br>
+ &keepalive, sizeof keepalive));<br>
+#ifdef TCP_KEEP_WORKS<br>
+ AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,<br>
+ &ka_time, sizeof ka_time));<br>
+ AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,<br>
+ &ka_probes, sizeof ka_probes));<br>
+ AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,<br>
+ &ka_intvl, sizeof ka_intvl));<br>
+#endif<br>
if (cache_param->accept_filter) {<br>
i = VTCP_filter_http(ls->sock);<br>
if (i)<br>
@@ -339,6 +432,29 @@ vca_acct(void *arg)<br>
t0 = VTIM_real();<br>
while (1) {<br>
(void)sleep(1);<br>
+#ifdef TCP_KEEP_WORKS<br>
+ if (cache_param->tcp_keepalive_time != ka_time ||<br>
+ cache_param->tcp_keepalive_probes != ka_probes ||<br>
+ cache_param->tcp_keepalive_intvl != ka_intvl) {<br>
+ need_test = 1;<br>
+ ka_time = cache_param->tcp_keepalive_time;<br>
+ ka_probes = cache_param->tcp_keepalive_probes;<br>
+ ka_intvl = cache_param->tcp_keepalive_intvl;<br>
+ VTAILQ_FOREACH(ls, &heritage.socks, list) {<br>
+ if (ls->sock < 0)<br>
+ continue;<br>
+ AZ(setsockopt(ls->sock, IPPROTO_TCP,<br>
+ TCP_KEEPIDLE,<br>
+ &ka_time, sizeof ka_time));<br>
+ AZ(setsockopt(ls->sock, IPPROTO_TCP,<br>
+ TCP_KEEPCNT,<br>
+ &ka_probes, sizeof ka_probes));<br>
+ AZ(setsockopt(ls->sock, IPPROTO_TCP,<br>
+ TCP_KEEPINTVL,<br>
+ &ka_intvl, sizeof ka_intvl));<br>
+ }<br>
+ }<br>
+#endif<br>
#ifdef SO_SNDTIMEO_WORKS<br>
if (cache_param->idle_send_timeout != send_timeout) {<br>
need_test = 1;<br>
diff --git a/bin/varnishd/common/params.h b/bin/varnishd/common/params.h<br>
index a6e881b..6893461 100644<br>
--- a/bin/varnishd/common/params.h<br>
+++ b/bin/varnishd/common/params.h<br>
@@ -110,6 +110,11 @@ struct params {<br>
unsigned pipe_timeout;<br>
unsigned send_timeout;<br>
unsigned idle_send_timeout;<br>
+#ifdef TCP_KEEP_WORKS<br>
+ unsigned tcp_keepalive_time;<br>
+ unsigned tcp_keepalive_probes;<br>
+ unsigned tcp_keepalive_intvl;<br>
+#endif<br>
<br>
/* Management hints */<br>
unsigned auto_restart;<br>
diff --git a/bin/varnishd/mgt/mgt_param_tbl.c b/bin/varnishd/mgt/mgt_param_tbl.c<br>
index 8601bae..0380a02 100644<br>
--- a/bin/varnishd/mgt/mgt_param_tbl.c<br>
+++ b/bin/varnishd/mgt/mgt_param_tbl.c<br>
@@ -205,6 +205,26 @@ const struct parspec mgt_parspec[] = {<br>
"See setsockopt(2) under SO_SNDTIMEO for more information.",<br>
DELAYED_EFFECT,<br>
"60", "seconds" },<br>
+#ifdef TCP_KEEP_WORKS<br>
+ { "tcp_keepalive_time", tweak_timeout, &mgt_param.tcp_keepalive_time,<br>
+ 1, 7200,<br>
+ "The number of seconds a connection needs to be idle before "<br>
+ "TCP begins sending out keep-alive probes.",<br>
+ 0,<br>
+ "600", "seconds" },<br>
+ { "tcp_keepalive_probes", tweak_uint, &mgt_param.tcp_keepalive_probes,<br>
+ 1, 100,<br>
+ "The maximum number of TCP keep-alive probes to send before "<br>
+ "giving up and killing the connection if no response is "<br>
+ "obtained from the other end.",<br>
+ 0,<br>
+ "5", "probes" },<br>
+ { "tcp_keepalive_intvl", tweak_timeout, &mgt_param.tcp_keepalive_intvl,<br>
+ 1, 100,<br>
+ "The number of seconds between TCP keep-alive probes.",<br>
+ 0,<br>
+ "5", "seconds" },<br>
+#endif<br>
{ "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,<br>
"Restart child process automatically if it dies.\n",<br>
0,<br>
diff --git a/<a href="http://configure.ac" target="_blank">configure.ac</a> b/<a href="http://configure.ac" target="_blank">configure.ac</a><br>
index a4cd8e8..6613980 100644<br>
--- a/<a href="http://configure.ac" target="_blank">configure.ac</a><br>
+++ b/<a href="http://configure.ac" target="_blank">configure.ac</a><br>
@@ -423,6 +423,38 @@ if test "$ac_cv_so_rcvtimeo_works" = no ||<br>
fi<br>
LIBS="${save_LIBS}"<br>
<br>
+# Check if the OS supports TCP_KEEP(CNT|IDLE|INTVL) socket options<br>
+save_LIBS="${LIBS}"<br>
+LIBS="${LIBS} ${NET_LIBS}"<br>
+AC_CACHE_CHECK([for TCP_KEEP(CNT|IDLE|INTVL) socket options],<br>
+ [ac_cv_tcp_keep_works],<br>
+ [AC_RUN_IFELSE(<br>
+ [AC_LANG_PROGRAM([[<br>
+#include <stdio.h><br>
+#include <sys/types.h><br>
+#include <sys/socket.h><br>
+#include <netinet/in.h><br>
+#include <netinet/tcp.h><br>
+ ]],[[<br>
+int s = socket(AF_INET, SOCK_STREAM, 0);<br>
+int i;<br>
+i = 5;<br>
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &i, sizeof i))<br>
+ return (1);<br>
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &i, sizeof i))<br>
+ return (1);<br>
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &i, sizeof i))<br>
+ return (1);<br>
+return (0);<br>
+ ]])],<br>
+ [ac_cv_tcp_keep_works=yes],<br>
+ [ac_cv_tcp_keep_works=no])<br>
+ ])<br>
+if test "$ac_cv_tcp_keep_works" = yes; then<br>
+ AC_DEFINE([TCP_KEEP_WORKS], [1], [Define if TCP_KEEP* works])<br>
+fi<br>
+LIBS="${save_LIBS}"<br>
+<br>
# Run-time directory<br>
VARNISH_STATE_DIR='${localstatedir}/varnish'<br>
AC_SUBST(VARNISH_STATE_DIR)<br>
diff --git a/doc/sphinx/installation/platformnotes.rst b/doc/sphinx/installation/platformnotes.rst<br>
index 3ad486c..e1720b6 100644<br>
--- a/doc/sphinx/installation/platformnotes.rst<br>
+++ b/doc/sphinx/installation/platformnotes.rst<br>
@@ -35,3 +35,18 @@ Reduce the maximum stack size by running::<br>
<br>
in the Varnish startup script.<br>
<br>
+TCP keep-alive configuration<br>
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~<br>
+<br>
+On platforms except Linux, Varnish is not able to set the TCP<br>
+keep-alive values per socket, and therefor the tcp_keepalive_* Varnish<br>
+runtime parameters are not available. On these platforms it can be<br>
+benefitial to tune the system wide values for these in order to more<br>
+reliably detect remote close for sessions spending long time on<br>
+waitinglists. This will help free up resources faster.<br>
+<br>
+On Linux the defaults are set to:<br>
+<br>
+ tcp_keepalive_time = 600 seconds<br>
+ tcp_keepalive_probes = 5<br>
+ tcp_keepalive_intvl = 5 seconds<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.10.4<br>
<br>
<br>
_______________________________________________<br>
varnish-dev mailing list<br>
<a href="mailto:varnish-dev@varnish-cache.org">varnish-dev@varnish-cache.org</a><br>
<a href="https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev" target="_blank">https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev</a><br>
</font></span></blockquote></div><br></div>