[master] 9c6f6043f vca: Have one inheritance check per listen socket

Nils Goroll nils.goroll at uplex.de
Tue Nov 23 12:31:11 UTC 2021


commit 9c6f6043fc6a7920100f495793790e7fb311ee89
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date:   Mon Sep 27 14:53:56 2021 +0200

    vca: Have one inheritance check per listen socket
    
    Instead of having a single global check that all acceptors may race
    towards, this check now happens on a per listen socket basis. For
    sockets with a different inheritance behavior on a single system, we
    avoid having the first connection dictate what may be inherited by
    a connection socket from its listen socket for all the other listen
    addresses.
    
    At least on Linux, Unix-domain sockets DO NOT inherit options like
    SO_{RCV,SND}TIMEO even though TCP sockets do. On the off chance that
    even sockets of the same family could behave differently, like for
    example a regular vs a loopback TCP session, this is done on a per
    listen address basis. To avoid cache-acceptor coordination with the
    acceptor worker threads of a given listen address, workers will
    individually perform this check once and for all when the first
    connection is accepted.
    
    We also stay defensive in the event of a parameter change, just in
    case a previous test would assume inheritance because the Varnish
    parameter value would match the kernel default value. Once a mismatch
    is observed for a given connection with a given socket, the inheritance
    test is no longer performed needlessly for this combination.
    
    A race still exists between acceptors from different thread pools for
    a given listen address, but this race is identical to the previous one
    based on the former global need_test variable.
    
    Although the inheritance check leaks into struct listen_sock, it is
    opaque so everything can remain contained inside cache_acceptor.c.
    
    Some aspects of this change (including the clarification comments) are
    from @mbgrydeland.
    
    Refs #2722

diff --git a/bin/varnishd/cache/cache_acceptor.c b/bin/varnishd/cache/cache_acceptor.c
index a15461a54..d00725c0b 100644
--- a/bin/varnishd/cache/cache_acceptor.c
+++ b/bin/varnishd/cache/cache_acceptor.c
@@ -90,11 +90,13 @@ static struct sock_opt {
 	int		level;
 	int		optname;
 	const char	*strname;
-	int		need;
+	unsigned	mod;
 	socklen_t	sz;
 	union sock_arg	arg[1];
 } sock_opts[] = {
-#define SOCK_OPT(lvl, nam, typ) { lvl, nam, #nam, 0, sizeof(typ) },
+	/* Note: Setting the mod counter to something not-zero is needed
+	 * to force the setsockopt() calls on startup */
+#define SOCK_OPT(lvl, nam, typ) { lvl, nam, #nam, 1, sizeof(typ) },
 
 	SOCK_OPT(SOL_SOCKET, SO_LINGER, struct linger)
 	SOCK_OPT(SOL_SOCKET, SO_KEEPALIVE, int)
@@ -117,6 +119,11 @@ static struct sock_opt {
 
 static const int n_sock_opts = sizeof sock_opts / sizeof sock_opts[0];
 
+struct conn_heritage {
+	unsigned	sess_set;
+	unsigned	listen_mod;
+};
+
 /*--------------------------------------------------------------------
  * We want to get out of any kind of trouble-hit TCP connections as fast
  * as absolutely possible, so we set them LINGER disabled, so that even if
@@ -139,8 +146,6 @@ static const unsigned enable_so_keepalive = 1;
 
 static const unsigned enable_tcp_nodelay = 1;
 
-static unsigned		need_test;
-
 /*--------------------------------------------------------------------
  * lacking a better place, we put some generic periodic updates
  * into the vca_acct() loop which we are running anyway
@@ -191,9 +196,8 @@ vca_sock_opt_init(void)
 			tmp.fld = (val);				\
 			if (memcmp(&so->arg->fld, &(tmp.fld), sz)) {	\
 				memcpy(&so->arg->fld, &(tmp.fld), sz);	\
-				so->need = 1;				\
+				so->mod++;				\
 				chg = 1;				\
-				need_test = 1;				\
 			}						\
 		}							\
 	} while (0)
@@ -224,6 +228,7 @@ vca_sock_opt_init(void)
 static void
 vca_sock_opt_test(const struct listen_sock *ls, const struct sess *sp)
 {
+	struct conn_heritage *ch;
 	struct sock_opt *so;
 	union sock_arg tmp;
 	socklen_t l;
@@ -234,14 +239,16 @@ vca_sock_opt_test(const struct listen_sock *ls, const struct sess *sp)
 
 	for (n = 0; n < n_sock_opts; n++) {
 		so = &sock_opts[n];
+		ch = &ls->conn_heritage[n];
+		if (ch->sess_set)
+			continue; /* Already set, no need to retest */
 		if (so->level == IPPROTO_TCP && ls->uds)
 			continue;
-		so->need = 1;
 		memset(&tmp, 0, sizeof tmp);
 		l = so->sz;
 		i = getsockopt(sp->fd, so->level, so->optname, &tmp, &l);
-		if (i == 0 && !memcmp(&tmp, so->arg, so->sz))
-			so->need = 0;
+		if (i == 0 && memcmp(&tmp, so->arg, so->sz))
+			ch->sess_set = 1;
 		if (i && errno != ENOPROTOOPT)
 			VTCP_Assert(i);
 	}
@@ -250,6 +257,7 @@ vca_sock_opt_test(const struct listen_sock *ls, const struct sess *sp)
 static void
 vca_sock_opt_set(const struct listen_sock *ls, const struct sess *sp)
 {
+	struct conn_heritage *ch;
 	struct sock_opt *so;
 	int n, sock;
 
@@ -259,12 +267,17 @@ vca_sock_opt_set(const struct listen_sock *ls, const struct sess *sp)
 
 	for (n = 0; n < n_sock_opts; n++) {
 		so = &sock_opts[n];
+		ch = &ls->conn_heritage[n];
 		if (so->level == IPPROTO_TCP && ls->uds)
 			continue;
-		if (so->need || sp == NULL) {
-			VTCP_Assert(setsockopt(sock,
-			    so->level, so->optname, so->arg, so->sz));
-		}
+		if (sp == NULL && ch->listen_mod == so->mod)
+			continue;
+		if  (sp != NULL && !ch->sess_set)
+			continue;
+		VTCP_Assert(setsockopt(sock,
+		    so->level, so->optname, so->arg, so->sz));
+		if (sp == NULL)
+			ch->listen_mod = so->mod;
 	}
 }
 
@@ -410,9 +423,9 @@ vca_make_session(struct worker *wrk, void *arg)
 	vca_pace_good();
 	wrk->stats->sess_conn++;
 
-	if (need_test) {
+	if (wa->acceptlsock->test_heritage) {
 		vca_sock_opt_test(wa->acceptlsock, sp);
-		need_test = 0;
+		wa->acceptlsock->test_heritage = 0;
 	}
 	vca_sock_opt_set(wa->acceptlsock, sp);
 
@@ -607,6 +620,17 @@ vca_acct(void *arg)
 					continue;	// VCA_Shutdown
 				assert (ls->sock > 0);
 				vca_sock_opt_set(ls, NULL);
+				/* If one of the options on a socket has
+				 * changed, also force a retest of whether
+				 * the values are inherited to the
+				 * accepted sockets. This should then
+				 * catch any false positives from previous
+				 * tests that could happen if the set
+				 * value of an option happened to just be
+				 * the OS default for that value, and
+				 * wasn't actually inherited from the
+				 * listening socket. */
+				ls->test_heritage = 1;
 			}
 			AZ(pthread_mutex_unlock(&shut_mtx));
 		}
@@ -642,6 +666,11 @@ ccf_start(struct cli *cli, const char * const *av, void *priv)
 			    ls->endpoint, VAS_errtxt(errno));
 			return;
 		}
+		AZ(ls->conn_heritage);
+		ls->conn_heritage = calloc(n_sock_opts,
+		    sizeof *ls->conn_heritage);
+		AN(ls->conn_heritage);
+		ls->test_heritage = 1;
 		vca_sock_opt_set(ls, NULL);
 		if (cache_param->accept_filter && VTCP_filter_http(ls->sock))
 			VSL(SLT_Error, 0,
@@ -649,8 +678,6 @@ ccf_start(struct cli *cli, const char * const *av, void *priv)
 			    ls->sock, errno, VAS_errtxt(errno));
 	}
 
-	need_test = 1;
-
 	AZ(pthread_create(&VCA_thread, NULL, vca_acct, NULL));
 }
 
diff --git a/bin/varnishd/common/heritage.h b/bin/varnishd/common/heritage.h
index f2b2e448f..8244f8046 100644
--- a/bin/varnishd/common/heritage.h
+++ b/bin/varnishd/common/heritage.h
@@ -37,6 +37,7 @@ struct listen_sock;
 struct transport;
 struct VCLS;
 struct uds_perms;
+struct conn_heritage;
 
 struct listen_sock {
 	unsigned			magic;
@@ -50,6 +51,8 @@ struct listen_sock {
 	struct suckaddr			*addr;
 	const struct transport		*transport;
 	const struct uds_perms		*perms;
+	unsigned			test_heritage;
+	struct conn_heritage		*conn_heritage;
 };
 
 VTAILQ_HEAD(listen_sock_head, listen_sock);
diff --git a/bin/varnishtest/tests/r02722.vtc b/bin/varnishtest/tests/r02722.vtc
new file mode 100644
index 000000000..e8a4eef61
--- /dev/null
+++ b/bin/varnishtest/tests/r02722.vtc
@@ -0,0 +1,37 @@
+varnishtest "TCP vs UDS sockopt inheritance"
+
+feature cmd {test $(uname) != SunOS}
+
+server s0 {
+        rxreqhdrs
+        non_fatal
+        rxreqbody
+        txresp
+} -dispatch
+
+varnish v1 -arg {-a :0 -a "${tmpdir}/v1.sock"}
+varnish v1 -cliok "param.set debug +flush_head"
+varnish v1 -cliok "param.set timeout_idle 1"
+varnish v1 -vcl+backend { } -start
+
+client c1 {
+        txreq -req POST -hdr "Content-Length: 100"
+        send some
+        rxresp
+        expect resp.status == 503
+}
+
+# This run performs the inheritance test on a TCP connection
+client c1 -run
+
+# This run relies on the inheritance test results
+client c1 -run
+
+varnish v1 -vsl_catchup
+
+# This run checks that TCP results aren't applied to a UDS
+client c1 -connect "${tmpdir}/v1.sock"
+client c1 -run
+
+# And finally this run checks UDS inheritance test results
+client c1 -run


More information about the varnish-commit mailing list