[PATCH 1/2] Turn on SO_KEEPALIVE on all TCP connections.
Andrea Campi
andrea.campi at zephirworks.com
Mon Feb 11 15:20:57 CET 2013
FreeBSD seems to also have these sockets options since this commit:
http://svnweb.freebsd.org/base?view=revision&revision=232945
At a quick glance, this went in for FreeBSD 9.1.
On Mon, Feb 11, 2013 at 3:09 PM, Martin Blix Grydeland <
martin at varnish-software.com> wrote:
> This will help in determining remote hang up of the connection for
> situations where we still are not able to send any reply, but freeing
> the session will reduce resource overhead (e.g. when staying on
> waitinglists for extended periods).
>
> On platforms that support it also add runtime parameters to control
> the keep-alive packet settings through socket options. On platforms
> that don't support these socket options, the values must be set system
> wide.
> ---
> bin/varnishd/cache/cache_acceptor.c | 116
> +++++++++++++++++++++++++++++
> bin/varnishd/common/params.h | 5 ++
> bin/varnishd/mgt/mgt_param_tbl.c | 20 +++++
> configure.ac | 32 ++++++++
> doc/sphinx/installation/platformnotes.rst | 15 ++++
> 5 files changed, 188 insertions(+)
>
> diff --git a/bin/varnishd/cache/cache_acceptor.c
> b/bin/varnishd/cache/cache_acceptor.c
> index 62209a5..4e17dfb 100644
> --- a/bin/varnishd/cache/cache_acceptor.c
> +++ b/bin/varnishd/cache/cache_acceptor.c
> @@ -70,8 +70,23 @@ static const struct linger linger = {
> .l_onoff = 0,
> };
>
> +/*
> + * We turn on keepalives by default to assist in detecting clients that
> have
> + * hung up on connections returning from waitinglists
> + */
> +static const int keepalive = 1;
> +
> static unsigned char need_sndtimeo, need_rcvtimeo, need_linger,
> need_test,
> need_tcpnodelay;
> +static unsigned char need_keepalive = 0;
> +#ifdef TCP_KEEP_WORKS
> +static unsigned char need_ka_time = 0;
> +static unsigned char need_ka_probes = 0;
> +static unsigned char need_ka_intvl = 0;
> +static int ka_time = 0;
> +static int ka_probes = 0;
> +static int ka_intvl = 0;
> +#endif
>
> /*--------------------------------------------------------------------
> * Some kernels have bugs/limitations with respect to which options are
> @@ -83,6 +98,10 @@ static void
> sock_test(int fd)
> {
> struct linger lin;
> + int tka;
> +#ifdef TCP_KEEP_WORKS
> + int tka_time, tka_probes, tka_intvl;
> +#endif
> struct timeval tv;
> socklen_t l;
> int i, tcp_nodelay;
> @@ -97,6 +116,48 @@ sock_test(int fd)
> if (memcmp(&lin, &linger, l))
> need_linger = 1;
>
> + l = sizeof tka;
> + i = getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &tka, &l);
> + if (i) {
> + VTCP_Assert(i);
> + return;
> + }
> + assert(l == sizeof tka);
> + if (tka != keepalive)
> + need_keepalive = 1;
> +
> +#ifdef TCP_KEEP_WORKS
> + l = sizeof tka_time;
> + i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &tka_time, &l);
> + if (i) {
> + VTCP_Assert(i);
> + return;
> + }
> + assert(l == sizeof tka_time);
> + if (tka_time != ka_time)
> + need_ka_time = 1;
> +
> + l = sizeof tka_probes;
> + i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &tka_probes, &l);
> + if (i) {
> + VTCP_Assert(i);
> + return;
> + }
> + assert(l == sizeof tka_probes);
> + if (tka_probes != ka_probes)
> + need_ka_probes = 1;
> +
> + l = sizeof tka_intvl;
> + i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &tka_intvl, &l);
> + if (i) {
> + VTCP_Assert(i);
> + return;
> + }
> + assert(l == sizeof tka_intvl);
> + if (tka_intvl != ka_intvl)
> + need_ka_intvl = 1;
> +#endif
> +
> #ifdef SO_SNDTIMEO_WORKS
> l = sizeof tv;
> i = getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &l);
> @@ -281,6 +342,22 @@ VCA_SetupSess(struct worker *wrk, struct sess *sp)
> if (need_linger)
> VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_LINGER,
> &linger, sizeof linger));
> + if (need_keepalive)
> + VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_KEEPALIVE,
> + &keepalive, sizeof keepalive));
> +#ifdef TCP_KEEP_WORKS
> + AN(ka_time);
> + if (need_ka_time)
> + VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPIDLE,
> + &ka_time, sizeof ka_time));
> + if (need_ka_probes)
> + VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPCNT,
> + &ka_probes, sizeof ka_probes));
> + if (need_ka_intvl)
> + VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPINTVL,
> + &ka_intvl, sizeof ka_intvl));
> +#endif
> +
> #ifdef SO_SNDTIMEO_WORKS
> if (need_sndtimeo)
> VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_SNDTIMEO,
> @@ -316,6 +393,12 @@ vca_acct(void *arg)
> THR_SetName("cache-acceptor");
> (void)arg;
>
> +#ifdef TCP_KEEP_WORKS
> + ka_time = cache_param->tcp_keepalive_time;
> + ka_probes = cache_param->tcp_keepalive_probes;
> + ka_intvl = cache_param->tcp_keepalive_intvl;
> +#endif
> +
> VTAILQ_FOREACH(ls, &heritage.socks, list) {
> if (ls->sock < 0)
> continue;
> @@ -324,6 +407,16 @@ vca_acct(void *arg)
> &linger, sizeof linger));
> AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_NODELAY,
> &tcp_nodelay, sizeof tcp_nodelay));
> + AZ(setsockopt(ls->sock, SOL_SOCKET, SO_KEEPALIVE,
> + &keepalive, sizeof keepalive));
> +#ifdef TCP_KEEP_WORKS
> + AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,
> + &ka_time, sizeof ka_time));
> + AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,
> + &ka_probes, sizeof ka_probes));
> + AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,
> + &ka_intvl, sizeof ka_intvl));
> +#endif
> if (cache_param->accept_filter) {
> i = VTCP_filter_http(ls->sock);
> if (i)
> @@ -339,6 +432,29 @@ vca_acct(void *arg)
> t0 = VTIM_real();
> while (1) {
> (void)sleep(1);
> +#ifdef TCP_KEEP_WORKS
> + if (cache_param->tcp_keepalive_time != ka_time ||
> + cache_param->tcp_keepalive_probes != ka_probes ||
> + cache_param->tcp_keepalive_intvl != ka_intvl) {
> + need_test = 1;
> + ka_time = cache_param->tcp_keepalive_time;
> + ka_probes = cache_param->tcp_keepalive_probes;
> + ka_intvl = cache_param->tcp_keepalive_intvl;
> + VTAILQ_FOREACH(ls, &heritage.socks, list) {
> + if (ls->sock < 0)
> + continue;
> + AZ(setsockopt(ls->sock, IPPROTO_TCP,
> + TCP_KEEPIDLE,
> + &ka_time, sizeof ka_time));
> + AZ(setsockopt(ls->sock, IPPROTO_TCP,
> + TCP_KEEPCNT,
> + &ka_probes, sizeof ka_probes));
> + AZ(setsockopt(ls->sock, IPPROTO_TCP,
> + TCP_KEEPINTVL,
> + &ka_intvl, sizeof ka_intvl));
> + }
> + }
> +#endif
> #ifdef SO_SNDTIMEO_WORKS
> if (cache_param->idle_send_timeout != send_timeout) {
> need_test = 1;
> diff --git a/bin/varnishd/common/params.h b/bin/varnishd/common/params.h
> index a6e881b..6893461 100644
> --- a/bin/varnishd/common/params.h
> +++ b/bin/varnishd/common/params.h
> @@ -110,6 +110,11 @@ struct params {
> unsigned pipe_timeout;
> unsigned send_timeout;
> unsigned idle_send_timeout;
> +#ifdef TCP_KEEP_WORKS
> + unsigned tcp_keepalive_time;
> + unsigned tcp_keepalive_probes;
> + unsigned tcp_keepalive_intvl;
> +#endif
>
> /* Management hints */
> unsigned auto_restart;
> diff --git a/bin/varnishd/mgt/mgt_param_tbl.c
> b/bin/varnishd/mgt/mgt_param_tbl.c
> index 8601bae..0380a02 100644
> --- a/bin/varnishd/mgt/mgt_param_tbl.c
> +++ b/bin/varnishd/mgt/mgt_param_tbl.c
> @@ -205,6 +205,26 @@ const struct parspec mgt_parspec[] = {
> "See setsockopt(2) under SO_SNDTIMEO for more
> information.",
> DELAYED_EFFECT,
> "60", "seconds" },
> +#ifdef TCP_KEEP_WORKS
> + { "tcp_keepalive_time", tweak_timeout,
> &mgt_param.tcp_keepalive_time,
> + 1, 7200,
> + "The number of seconds a connection needs to be idle
> before "
> + "TCP begins sending out keep-alive probes.",
> + 0,
> + "600", "seconds" },
> + { "tcp_keepalive_probes", tweak_uint,
> &mgt_param.tcp_keepalive_probes,
> + 1, 100,
> + "The maximum number of TCP keep-alive probes to send
> before "
> + "giving up and killing the connection if no response is "
> + "obtained from the other end.",
> + 0,
> + "5", "probes" },
> + { "tcp_keepalive_intvl", tweak_timeout,
> &mgt_param.tcp_keepalive_intvl,
> + 1, 100,
> + "The number of seconds between TCP keep-alive probes.",
> + 0,
> + "5", "seconds" },
> +#endif
> { "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,
> "Restart child process automatically if it dies.\n",
> 0,
> diff --git a/configure.ac b/configure.ac
> index a4cd8e8..6613980 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -423,6 +423,38 @@ if test "$ac_cv_so_rcvtimeo_works" = no ||
> fi
> LIBS="${save_LIBS}"
>
> +# Check if the OS supports TCP_KEEP(CNT|IDLE|INTVL) socket options
> +save_LIBS="${LIBS}"
> +LIBS="${LIBS} ${NET_LIBS}"
> +AC_CACHE_CHECK([for TCP_KEEP(CNT|IDLE|INTVL) socket options],
> + [ac_cv_tcp_keep_works],
> + [AC_RUN_IFELSE(
> + [AC_LANG_PROGRAM([[
> +#include <stdio.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <netinet/in.h>
> +#include <netinet/tcp.h>
> + ]],[[
> +int s = socket(AF_INET, SOCK_STREAM, 0);
> +int i;
> +i = 5;
> +if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &i, sizeof i))
> + return (1);
> +if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &i, sizeof i))
> + return (1);
> +if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &i, sizeof i))
> + return (1);
> +return (0);
> + ]])],
> + [ac_cv_tcp_keep_works=yes],
> + [ac_cv_tcp_keep_works=no])
> + ])
> +if test "$ac_cv_tcp_keep_works" = yes; then
> + AC_DEFINE([TCP_KEEP_WORKS], [1], [Define if TCP_KEEP* works])
> +fi
> +LIBS="${save_LIBS}"
> +
> # Run-time directory
> VARNISH_STATE_DIR='${localstatedir}/varnish'
> AC_SUBST(VARNISH_STATE_DIR)
> diff --git a/doc/sphinx/installation/platformnotes.rst
> b/doc/sphinx/installation/platformnotes.rst
> index 3ad486c..e1720b6 100644
> --- a/doc/sphinx/installation/platformnotes.rst
> +++ b/doc/sphinx/installation/platformnotes.rst
> @@ -35,3 +35,18 @@ Reduce the maximum stack size by running::
>
> in the Varnish startup script.
>
> +TCP keep-alive configuration
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +On platforms except Linux, Varnish is not able to set the TCP
> +keep-alive values per socket, and therefor the tcp_keepalive_* Varnish
> +runtime parameters are not available. On these platforms it can be
> +benefitial to tune the system wide values for these in order to more
> +reliably detect remote close for sessions spending long time on
> +waitinglists. This will help free up resources faster.
> +
> +On Linux the defaults are set to:
> +
> + tcp_keepalive_time = 600 seconds
> + tcp_keepalive_probes = 5
> + tcp_keepalive_intvl = 5 seconds
> --
> 1.7.10.4
>
>
> _______________________________________________
> varnish-dev mailing list
> varnish-dev at varnish-cache.org
> https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-dev/attachments/20130211/5c12845d/attachment-0001.html>
More information about the varnish-dev
mailing list