[6.0] ca928ab3d vca: Have one inheritance check per listen socket
Martin Blix Grydeland
martin at varnish-software.com
Fri Nov 19 14:58:12 UTC 2021
commit ca928ab3d0766759c4b3aa56d307733fcdae5abc
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date: Mon Sep 27 14:53:56 2021 +0200
vca: Have one inheritance check per listen socket
Instead of having a single global check that all acceptors may race
towards, this check now happens on a per listen socket basis. For
sockets with a different inheritance behavior on a single system, we
avoid having the first connection dictate what may be inherited by
a connection socket from its listen socket for all the other listen
addresses.
At least on Linux, Unix-domain sockets DO NOT inherit options like
SO_{RCV,SND}TIMEO even though TCP sockets do. On the off chance that
even sockets of the same family could behave differently, like for
example a regular vs a loopback TCP session, this is done on a per
listen address basis. To avoid cache-acceptor coordination with the
acceptor worker threads of a given listen address, workers will
individually perform this check once and for all when the first
connection is accepted.
We also stay defensive in the event of a parameter change, just in
case a previous test would assume inheritance because the Varnish
parameter value would match the kernel default value. Once a mismatch
is observed for a given connection with a given socket, the inheritance
test is no longer performed needlessly for this combination.
A race still exists between acceptors from different thread pools for
a given listen address, but this race is identical to the previous one
based on the former global need_test variable.
Although the inheritance check leaks into struct listen_sock, it is
opaque so everything can remain contained inside cache_acceptor.c.
Some aspects of this change (including the clarification comments) are
from @mbgrydeland.
Refs #2722
diff --git a/bin/varnishd/cache/cache_acceptor.c b/bin/varnishd/cache/cache_acceptor.c
index 94d32b1f2..b217a5863 100644
--- a/bin/varnishd/cache/cache_acceptor.c
+++ b/bin/varnishd/cache/cache_acceptor.c
@@ -91,11 +91,13 @@ static struct sock_opt {
int level;
int optname;
const char *strname;
- int need;
+ unsigned mod;
socklen_t sz;
union sock_arg arg[1];
} sock_opts[] = {
-#define SOCK_OPT(lvl, nam, typ) { lvl, nam, #nam, 0, sizeof(typ) },
+ /* Note: Setting the mod counter to something not-zero is needed
+ * to force the setsockopt() calls on startup */
+#define SOCK_OPT(lvl, nam, typ) { lvl, nam, #nam, 1, sizeof(typ) },
SOCK_OPT(SOL_SOCKET, SO_LINGER, struct linger)
SOCK_OPT(SOL_SOCKET, SO_KEEPALIVE, int)
@@ -115,6 +117,11 @@ static struct sock_opt {
static const int n_sock_opts = sizeof sock_opts / sizeof sock_opts[0];
+struct conn_heritage {
+ unsigned sess_set;
+ unsigned listen_mod;
+};
+
/*--------------------------------------------------------------------
* We want to get out of any kind of trouble-hit TCP connections as fast
* as absolutely possible, so we set them LINGER disabled, so that even if
@@ -137,8 +144,6 @@ static const unsigned enable_so_keepalive = 1;
static const unsigned enable_tcp_nodelay = 1;
-static unsigned need_test;
-
/*--------------------------------------------------------------------
* lacking a better place, we put some generic periodic updates
* into the vca_acct() loop which we are running anyway
@@ -189,9 +194,8 @@ vca_sock_opt_init(void)
tmp.fld = (val); \
if (memcmp(&so->arg->fld, &(tmp.fld), sz)) { \
memcpy(&so->arg->fld, &(tmp.fld), sz); \
- so->need = 1; \
+ so->mod++; \
chg = 1; \
- need_test = 1; \
} \
} \
} while (0)
@@ -218,6 +222,7 @@ vca_sock_opt_init(void)
static void
vca_sock_opt_test(const struct listen_sock *ls, const struct sess *sp)
{
+ struct conn_heritage *ch;
struct sock_opt *so;
union sock_arg tmp;
socklen_t l;
@@ -228,14 +233,16 @@ vca_sock_opt_test(const struct listen_sock *ls, const struct sess *sp)
for (n = 0; n < n_sock_opts; n++) {
so = &sock_opts[n];
+ ch = &ls->conn_heritage[n];
+ if (ch->sess_set)
+ continue; /* Already set, no need to retest */
if (so->level == IPPROTO_TCP && ls->uds)
continue;
- so->need = 1;
memset(&tmp, 0, sizeof tmp);
l = so->sz;
i = getsockopt(sp->fd, so->level, so->optname, &tmp, &l);
- if (i == 0 && !memcmp(&tmp, so->arg, so->sz))
- so->need = 0;
+ if (i == 0 && memcmp(&tmp, so->arg, so->sz))
+ ch->sess_set = 1;
if (i && errno != ENOPROTOOPT)
VTCP_Assert(i);
}
@@ -244,6 +251,7 @@ vca_sock_opt_test(const struct listen_sock *ls, const struct sess *sp)
static void
vca_sock_opt_set(const struct listen_sock *ls, const struct sess *sp)
{
+ struct conn_heritage *ch;
struct sock_opt *so;
int n, sock;
@@ -253,12 +261,17 @@ vca_sock_opt_set(const struct listen_sock *ls, const struct sess *sp)
for (n = 0; n < n_sock_opts; n++) {
so = &sock_opts[n];
+ ch = &ls->conn_heritage[n];
if (so->level == IPPROTO_TCP && ls->uds)
continue;
- if (so->need || sp == NULL) {
- VTCP_Assert(setsockopt(sock,
- so->level, so->optname, so->arg, so->sz));
- }
+ if (sp == NULL && ch->listen_mod == so->mod)
+ continue;
+ if (sp != NULL && !ch->sess_set)
+ continue;
+ VTCP_Assert(setsockopt(sock,
+ so->level, so->optname, so->arg, so->sz));
+ if (sp == NULL)
+ ch->listen_mod = so->mod;
}
}
@@ -418,9 +431,9 @@ vca_make_session(struct worker *wrk, void *arg)
vca_pace_good();
wrk->stats->sess_conn++;
- if (need_test) {
+ if (wa->acceptlsock->test_heritage) {
vca_sock_opt_test(wa->acceptlsock, sp);
- need_test = 0;
+ wa->acceptlsock->test_heritage = 0;
}
vca_sock_opt_set(wa->acceptlsock, sp);
@@ -599,6 +612,17 @@ vca_acct(void *arg)
continue; // VCA_Shutdown
assert (ls->sock > 0);
vca_sock_opt_set(ls, NULL);
+ /* If one of the options on a socket has
+ * changed, also force a retest of whether
+ * the values are inherited to the
+ * accepted sockets. This should then
+ * catch any false positives from previous
+ * tests that could happen if the set
+ * value of an option happened to just be
+ * the OS default for that value, and
+ * wasn't actually inherited from the
+ * listening socket. */
+ ls->test_heritage = 1;
}
AZ(pthread_mutex_unlock(&shut_mtx));
}
@@ -637,6 +661,11 @@ ccf_start(struct cli *cli, const char * const *av, void *priv)
ls->endpoint, strerror(errno));
return;
}
+ AZ(ls->conn_heritage);
+ ls->conn_heritage = calloc(n_sock_opts,
+ sizeof *ls->conn_heritage);
+ AN(ls->conn_heritage);
+ ls->test_heritage = 1;
vca_sock_opt_set(ls, NULL);
if (cache_param->accept_filter) {
int i;
@@ -648,8 +677,6 @@ ccf_start(struct cli *cli, const char * const *av, void *priv)
}
}
- need_test = 1;
-
AZ(pthread_create(&VCA_thread, NULL, vca_acct, NULL));
}
diff --git a/bin/varnishd/common/heritage.h b/bin/varnishd/common/heritage.h
index db1dd37d6..3d0157133 100644
--- a/bin/varnishd/common/heritage.h
+++ b/bin/varnishd/common/heritage.h
@@ -36,6 +36,7 @@ struct listen_sock;
struct transport;
struct VCLS;
struct uds_perms;
+struct conn_heritage;
struct listen_sock {
unsigned magic;
@@ -49,6 +50,8 @@ struct listen_sock {
struct suckaddr *addr;
const struct transport *transport;
const struct uds_perms *perms;
+ unsigned test_heritage;
+ struct conn_heritage *conn_heritage;
};
VTAILQ_HEAD(listen_sock_head, listen_sock);
diff --git a/bin/varnishtest/tests/r02722.vtc b/bin/varnishtest/tests/r02722.vtc
new file mode 100644
index 000000000..e8a4eef61
--- /dev/null
+++ b/bin/varnishtest/tests/r02722.vtc
@@ -0,0 +1,37 @@
+varnishtest "TCP vs UDS sockopt inheritance"
+
+feature cmd {test $(uname) != SunOS}
+
+server s0 {
+ rxreqhdrs
+ non_fatal
+ rxreqbody
+ txresp
+} -dispatch
+
+varnish v1 -arg {-a :0 -a "${tmpdir}/v1.sock"}
+varnish v1 -cliok "param.set debug +flush_head"
+varnish v1 -cliok "param.set timeout_idle 1"
+varnish v1 -vcl+backend { } -start
+
+client c1 {
+ txreq -req POST -hdr "Content-Length: 100"
+ send some
+ rxresp
+ expect resp.status == 503
+}
+
+# This run performs the inheritance test on a TCP connection
+client c1 -run
+
+# This run relies on the inheritance test results
+client c1 -run
+
+varnish v1 -vsl_catchup
+
+# This run checks that TCP results aren't applied to a UDS
+client c1 -connect "${tmpdir}/v1.sock"
+client c1 -run
+
+# And finally this run checks UDS inheritance test results
+client c1 -run
More information about the varnish-commit
mailing list