[PATCH 1/2] Turn on SO_KEEPALIVE on all TCP connections.

Martin Blix Grydeland martin at varnish-software.com
Mon Feb 11 15:50:44 CET 2013


Ah, I wasn't aware of that. Thanks Andrea, I will update the docs to
reflect that.

-Martin

On Mon, Feb 11, 2013 at 3:20 PM, Andrea Campi
<andrea.campi at zephirworks.com>wrote:

> FreeBSD seems to also have these sockets options since this commit:
>
> http://svnweb.freebsd.org/base?view=revision&revision=232945
>
> At a quick glance, this went in for FreeBSD 9.1.
>
>
> On Mon, Feb 11, 2013 at 3:09 PM, Martin Blix Grydeland <
> martin at varnish-software.com> wrote:
>
>> This will help in determining remote hang up of the connection for
>> situations where we still are not able to send any reply, but freeing
>> the session will reduce resource overhead (e.g. when staying on
>> waitinglists for extended periods).
>>
>> On platforms that support it also add runtime parameters to control
>> the keep-alive packet settings through socket options. On platforms
>> that don't support these socket options, the values must be set system
>> wide.
>> ---
>>  bin/varnishd/cache/cache_acceptor.c       |  116
>> +++++++++++++++++++++++++++++
>>  bin/varnishd/common/params.h              |    5 ++
>>  bin/varnishd/mgt/mgt_param_tbl.c          |   20 +++++
>>  configure.ac                              |   32 ++++++++
>>  doc/sphinx/installation/platformnotes.rst |   15 ++++
>>  5 files changed, 188 insertions(+)
>>
>> diff --git a/bin/varnishd/cache/cache_acceptor.c
>> b/bin/varnishd/cache/cache_acceptor.c
>> index 62209a5..4e17dfb 100644
>> --- a/bin/varnishd/cache/cache_acceptor.c
>> +++ b/bin/varnishd/cache/cache_acceptor.c
>> @@ -70,8 +70,23 @@ static const struct linger linger = {
>>         .l_onoff        =       0,
>>  };
>>
>> +/*
>> + * We turn on keepalives by default to assist in detecting clients that
>> have
>> + * hung up on connections returning from waitinglists
>> + */
>> +static const int keepalive = 1;
>> +
>>  static unsigned char   need_sndtimeo, need_rcvtimeo, need_linger,
>> need_test,
>>                         need_tcpnodelay;
>> +static unsigned char   need_keepalive = 0;
>> +#ifdef TCP_KEEP_WORKS
>> +static unsigned char   need_ka_time = 0;
>> +static unsigned char   need_ka_probes = 0;
>> +static unsigned char   need_ka_intvl = 0;
>> +static int             ka_time = 0;
>> +static int             ka_probes = 0;
>> +static int             ka_intvl = 0;
>> +#endif
>>
>>  /*--------------------------------------------------------------------
>>   * Some kernels have bugs/limitations with respect to which options are
>> @@ -83,6 +98,10 @@ static void
>>  sock_test(int fd)
>>  {
>>         struct linger lin;
>> +       int tka;
>> +#ifdef TCP_KEEP_WORKS
>> +       int tka_time, tka_probes, tka_intvl;
>> +#endif
>>         struct timeval tv;
>>         socklen_t l;
>>         int i, tcp_nodelay;
>> @@ -97,6 +116,48 @@ sock_test(int fd)
>>         if (memcmp(&lin, &linger, l))
>>                 need_linger = 1;
>>
>> +       l = sizeof tka;
>> +       i = getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &tka, &l);
>> +       if (i) {
>> +               VTCP_Assert(i);
>> +               return;
>> +       }
>> +       assert(l == sizeof tka);
>> +       if (tka != keepalive)
>> +               need_keepalive = 1;
>> +
>> +#ifdef TCP_KEEP_WORKS
>> +       l = sizeof tka_time;
>> +       i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &tka_time, &l);
>> +       if (i) {
>> +               VTCP_Assert(i);
>> +               return;
>> +       }
>> +       assert(l == sizeof tka_time);
>> +       if (tka_time != ka_time)
>> +               need_ka_time = 1;
>> +
>> +       l = sizeof tka_probes;
>> +       i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &tka_probes, &l);
>> +       if (i) {
>> +               VTCP_Assert(i);
>> +               return;
>> +       }
>> +       assert(l == sizeof tka_probes);
>> +       if (tka_probes != ka_probes)
>> +               need_ka_probes = 1;
>> +
>> +       l = sizeof tka_intvl;
>> +       i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &tka_intvl, &l);
>> +       if (i) {
>> +               VTCP_Assert(i);
>> +               return;
>> +       }
>> +       assert(l == sizeof tka_intvl);
>> +       if (tka_intvl != ka_intvl)
>> +               need_ka_intvl = 1;
>> +#endif
>> +
>>  #ifdef SO_SNDTIMEO_WORKS
>>         l = sizeof tv;
>>         i = getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &l);
>> @@ -281,6 +342,22 @@ VCA_SetupSess(struct worker *wrk, struct sess *sp)
>>         if (need_linger)
>>                 VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_LINGER,
>>                     &linger, sizeof linger));
>> +       if (need_keepalive)
>> +               VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_KEEPALIVE,
>> +                   &keepalive, sizeof keepalive));
>> +#ifdef TCP_KEEP_WORKS
>> +       AN(ka_time);
>> +       if (need_ka_time)
>> +               VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPIDLE,
>> +                       &ka_time, sizeof ka_time));
>> +       if (need_ka_probes)
>> +               VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPCNT,
>> +                       &ka_probes, sizeof ka_probes));
>> +       if (need_ka_intvl)
>> +               VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPINTVL,
>> +                       &ka_intvl, sizeof ka_intvl));
>> +#endif
>> +
>>  #ifdef SO_SNDTIMEO_WORKS
>>         if (need_sndtimeo)
>>                 VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_SNDTIMEO,
>> @@ -316,6 +393,12 @@ vca_acct(void *arg)
>>         THR_SetName("cache-acceptor");
>>         (void)arg;
>>
>> +#ifdef TCP_KEEP_WORKS
>> +       ka_time = cache_param->tcp_keepalive_time;
>> +       ka_probes = cache_param->tcp_keepalive_probes;
>> +       ka_intvl = cache_param->tcp_keepalive_intvl;
>> +#endif
>> +
>>         VTAILQ_FOREACH(ls, &heritage.socks, list) {
>>                 if (ls->sock < 0)
>>                         continue;
>> @@ -324,6 +407,16 @@ vca_acct(void *arg)
>>                     &linger, sizeof linger));
>>                 AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_NODELAY,
>>                     &tcp_nodelay, sizeof tcp_nodelay));
>> +               AZ(setsockopt(ls->sock, SOL_SOCKET, SO_KEEPALIVE,
>> +                   &keepalive, sizeof keepalive));
>> +#ifdef TCP_KEEP_WORKS
>> +               AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,
>> +                   &ka_time, sizeof ka_time));
>> +               AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,
>> +                   &ka_probes, sizeof ka_probes));
>> +               AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,
>> +                   &ka_intvl, sizeof ka_intvl));
>> +#endif
>>                 if (cache_param->accept_filter) {
>>                         i = VTCP_filter_http(ls->sock);
>>                         if (i)
>> @@ -339,6 +432,29 @@ vca_acct(void *arg)
>>         t0 = VTIM_real();
>>         while (1) {
>>                 (void)sleep(1);
>> +#ifdef TCP_KEEP_WORKS
>> +               if (cache_param->tcp_keepalive_time != ka_time ||
>> +                   cache_param->tcp_keepalive_probes != ka_probes ||
>> +                   cache_param->tcp_keepalive_intvl != ka_intvl) {
>> +                       need_test = 1;
>> +                       ka_time = cache_param->tcp_keepalive_time;
>> +                       ka_probes = cache_param->tcp_keepalive_probes;
>> +                       ka_intvl = cache_param->tcp_keepalive_intvl;
>> +                       VTAILQ_FOREACH(ls, &heritage.socks, list) {
>> +                               if (ls->sock < 0)
>> +                                       continue;
>> +                               AZ(setsockopt(ls->sock, IPPROTO_TCP,
>> +                                   TCP_KEEPIDLE,
>> +                                   &ka_time, sizeof ka_time));
>> +                               AZ(setsockopt(ls->sock, IPPROTO_TCP,
>> +                                   TCP_KEEPCNT,
>> +                                   &ka_probes, sizeof ka_probes));
>> +                               AZ(setsockopt(ls->sock, IPPROTO_TCP,
>> +                                   TCP_KEEPINTVL,
>> +                                   &ka_intvl, sizeof ka_intvl));
>> +                       }
>> +               }
>> +#endif
>>  #ifdef SO_SNDTIMEO_WORKS
>>                 if (cache_param->idle_send_timeout != send_timeout) {
>>                         need_test = 1;
>> diff --git a/bin/varnishd/common/params.h b/bin/varnishd/common/params.h
>> index a6e881b..6893461 100644
>> --- a/bin/varnishd/common/params.h
>> +++ b/bin/varnishd/common/params.h
>> @@ -110,6 +110,11 @@ struct params {
>>         unsigned                pipe_timeout;
>>         unsigned                send_timeout;
>>         unsigned                idle_send_timeout;
>> +#ifdef TCP_KEEP_WORKS
>> +       unsigned                tcp_keepalive_time;
>> +       unsigned                tcp_keepalive_probes;
>> +       unsigned                tcp_keepalive_intvl;
>> +#endif
>>
>>         /* Management hints */
>>         unsigned                auto_restart;
>> diff --git a/bin/varnishd/mgt/mgt_param_tbl.c
>> b/bin/varnishd/mgt/mgt_param_tbl.c
>> index 8601bae..0380a02 100644
>> --- a/bin/varnishd/mgt/mgt_param_tbl.c
>> +++ b/bin/varnishd/mgt/mgt_param_tbl.c
>> @@ -205,6 +205,26 @@ const struct parspec mgt_parspec[] = {
>>                 "See setsockopt(2) under SO_SNDTIMEO for more
>> information.",
>>                 DELAYED_EFFECT,
>>                 "60", "seconds" },
>> +#ifdef TCP_KEEP_WORKS
>> +       { "tcp_keepalive_time", tweak_timeout,
>> &mgt_param.tcp_keepalive_time,
>> +               1, 7200,
>> +               "The number of seconds a connection needs to be idle
>> before "
>> +               "TCP begins sending out keep-alive probes.",
>> +               0,
>> +               "600", "seconds" },
>> +       { "tcp_keepalive_probes", tweak_uint,
>> &mgt_param.tcp_keepalive_probes,
>> +               1, 100,
>> +               "The maximum number of TCP keep-alive probes to send
>> before "
>> +               "giving up and killing the connection if no response is "
>> +               "obtained from the other end.",
>> +               0,
>> +               "5", "probes" },
>> +       { "tcp_keepalive_intvl", tweak_timeout,
>> &mgt_param.tcp_keepalive_intvl,
>> +               1, 100,
>> +               "The number of seconds between TCP keep-alive probes.",
>> +               0,
>> +               "5", "seconds" },
>> +#endif
>>         { "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,
>>                 "Restart child process automatically if it dies.\n",
>>                 0,
>> diff --git a/configure.ac b/configure.ac
>> index a4cd8e8..6613980 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -423,6 +423,38 @@ if test "$ac_cv_so_rcvtimeo_works" = no ||
>>  fi
>>  LIBS="${save_LIBS}"
>>
>> +# Check if the OS supports TCP_KEEP(CNT|IDLE|INTVL) socket options
>> +save_LIBS="${LIBS}"
>> +LIBS="${LIBS} ${NET_LIBS}"
>> +AC_CACHE_CHECK([for TCP_KEEP(CNT|IDLE|INTVL) socket options],
>> +  [ac_cv_tcp_keep_works],
>> +  [AC_RUN_IFELSE(
>> +    [AC_LANG_PROGRAM([[
>> +#include <stdio.h>
>> +#include <sys/types.h>
>> +#include <sys/socket.h>
>> +#include <netinet/in.h>
>> +#include <netinet/tcp.h>
>> +    ]],[[
>> +int s = socket(AF_INET, SOCK_STREAM, 0);
>> +int i;
>> +i = 5;
>> +if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &i, sizeof i))
>> +  return (1);
>> +if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &i, sizeof i))
>> +  return (1);
>> +if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &i, sizeof i))
>> +  return (1);
>> +return (0);
>> +    ]])],
>> +    [ac_cv_tcp_keep_works=yes],
>> +    [ac_cv_tcp_keep_works=no])
>> +  ])
>> +if test "$ac_cv_tcp_keep_works" = yes; then
>> +   AC_DEFINE([TCP_KEEP_WORKS], [1], [Define if TCP_KEEP* works])
>> +fi
>> +LIBS="${save_LIBS}"
>> +
>>  # Run-time directory
>>  VARNISH_STATE_DIR='${localstatedir}/varnish'
>>  AC_SUBST(VARNISH_STATE_DIR)
>> diff --git a/doc/sphinx/installation/platformnotes.rst
>> b/doc/sphinx/installation/platformnotes.rst
>> index 3ad486c..e1720b6 100644
>> --- a/doc/sphinx/installation/platformnotes.rst
>> +++ b/doc/sphinx/installation/platformnotes.rst
>> @@ -35,3 +35,18 @@ Reduce the maximum stack size by running::
>>
>>  in the Varnish startup script.
>>
>> +TCP keep-alive configuration
>> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> +
>> +On platforms except Linux, Varnish is not able to set the TCP
>> +keep-alive values per socket, and therefor the tcp_keepalive_* Varnish
>> +runtime parameters are not available. On these platforms it can be
>> +benefitial to tune the system wide values for these in order to more
>> +reliably detect remote close for sessions spending long time on
>> +waitinglists. This will help free up resources faster.
>> +
>> +On Linux the defaults are set to:
>> +
>> +       tcp_keepalive_time = 600 seconds
>> +       tcp_keepalive_probes = 5
>> +       tcp_keepalive_intvl = 5 seconds
>> --
>> 1.7.10.4
>>
>>
>> _______________________________________________
>> varnish-dev mailing list
>> varnish-dev at varnish-cache.org
>> https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev
>>
>
>


-- 
<http://varnish-software.com>*Martin Blix Grydeland*
Senior Developer | Varnish Software AS
Cell: +47 21 98 92 60
We Make Websites Fly!
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-dev/attachments/20130211/21df6c7b/attachment-0001.html>


More information about the varnish-dev mailing list