[master] 6e22b8158 Basic "via" backends support

Mon Feb 20 15:38:06 UTC 2023

commit 6e22b8158347cd3f8bd24a65d447038959fa6686
Author: Nils Goroll <nils.goroll at uplex.de>
Date:   Fri Mar 22 15:50:07 2019 +0100

    Basic "via" backends support
    
    In varnish-cache, the deliberate decision has been made to not support
    TLS from the same address space as varnish itself, see
    doc/sphinx/phk/ssl_again.rst
    
    So the obvious way to connect to TLS backends is to use a TLS
    "onloader" (a term coined by @slimhazard as in the opposite of
    "offloader"), which turns a clear connection into a TLS connection.
    
    Before this change, this required additional configuration in two
    places: An address/port or UDS path needs to be uniquely allocated for
    each destination address, the specific onloader configuration has to
    be put in place and a varnish backend pointing to the onloader needs
    to be added. All of this for each individual backend. Also, this
    requirement prevents any use of dynamic backends with a TLS onloader.
    
    haproxy, however, offers a convenient and elegant way to avoid this
    configuration overhead: The PROXY protocol can also be used to
    transport the destination address which haproxy is to connect to if a
    server's address is unspecified (IN_ADDR_ANY / 0.0.0.0). The
    configuration template for this use case looks like this (huge thank
    you to @wtarreau for pointing out this great option in haproxy):
    
    listen clear-to-ssl
            bind /my/path/to/ssl_onloader accept-proxy
            balance roundrobin
            stick-table type ip size 100
            stick on dst
            server s0 0.0.0.0:443 ssl ca-file /etc/ssl/certs/ca-bundle.crt
            server s1 0.0.0.0:443 ssl ca-file /etc/ssl/certs/ca-bundle.crt
            server s2 0.0.0.0:443 ssl ca-file /etc/ssl/certs/ca-bundle.crt
            # .. approximately as many servers as expected peers
            # for improved tls session caching
    
    With this setup, by connecting to /my/path/to/ssl_onloader and sending
    the address to make a TLS connection to in a PROXY header (as the
    server address / port), we can reduce the configuration overhead
    outside varnish substantially. In particular, we do not require a path
    / port per destination dynamic TLS backends become possible
    
    This patch implements the basis for simple means of configuring such
    an ssl onloader:
    
    backends can be created with an additional "via" director, which has
    to resolve to a simple backend. The connection is then made to that
    address and the actual endpoint address is sent in an additional PROXY
    header.  Notice that sending yet another proxy header to the actual
    backend is unaffected. Despite using the same format, the two proxy
    headers are semantically different: The first, here coined the
    "preamble", is the address to make the connection to while the
    (optional) second proxy header continues to contain the addresses of
    the connection to varnish.
    
    Future improvements on the roadmap:
    
    * resolution of the "via" backend at the time the connection is made:
      This will allow for fault tolerance and load balancing of via
      backends
    
    * Cascade the health check: If the "via" backend is probed / set down,
      any backends using it could be set unhealthy also.
    
    * Timeouts: The "via" backend's timeouts could define maximum values
      for any connections made through it
    
    Tivia:
    
    To future Varnish-Cache historians, this patch originates from #2850
    and went through three more iterations, making it a likely candidate
    for the PR with the longest turnaround time of 1543 days.

diff --git a/bin/varnishd/cache/cache_backend.c b/bin/varnishd/cache/cache_backend.c
index 4802c05fc..f4722729e 100644
--- a/bin/varnishd/cache/cache_backend.c
+++ b/bin/varnishd/cache/cache_backend.c
@@ -37,16 +37,19 @@
 #include <stdlib.h>
 
 #include "cache_varnishd.h"
+#include "cache_director.h"
 
 #include "vsa.h"
 #include "vtcp.h"
 #include "vtim.h"
+#include "vsa.h"
 
 #include "cache_backend.h"
 #include "cache_conn_pool.h"
 #include "cache_transport.h"
 #include "cache_vcl.h"
 #include "http1/cache_http1.h"
+#include "proxy/cache_proxy.h"
 
 #include "VSC_vbe.h"
 
@@ -560,17 +563,87 @@ VRT_backend_vsm_need(VRT_CTX)
 	return (VRT_VSC_Overhead(VSC_vbe_size));
 }
 
+/*
+ * The new_backend via parameter is a VCL_BACKEND, but we need a (struct
+ * backend)
+ *
+ * For now, we resolve it when creating the backend, which imples no redundancy
+ * / load balancing across the via director if it is more than a simple backend.
+ */
+
+static const struct backend *
+via_resolve(VRT_CTX, const struct vrt_endpoint *vep, VCL_BACKEND via)
+{
+	const struct backend *viabe = NULL;
+
+	AN(vep);
+	AN(via);
+
+	if (vep->uds_path) {
+		VRT_fail(ctx, "Via is only supported for IP addresses");
+		return (NULL);
+	}
+
+	via = VRT_DirectorResolve(ctx, via);
+
+	if (via != NULL &&
+	    (via->vdir->methods == vbe_methods ||
+	     via->vdir->methods == vbe_methods_noprobe))
+		CAST_OBJ_NOTNULL(viabe, via->priv, BACKEND_MAGIC);
+
+	if (viabe == NULL)
+		VRT_fail(ctx, "Via does not resolve to a backend");
+
+	return (viabe);
+}
+
+/*
+ * construct a new endpoint identical to vep with sa in a proxy header
+ */
+static struct vrt_endpoint *
+via_endpoint(const struct vrt_endpoint *vep, const struct suckaddr *sa)
+{
+	struct vsb *preamble;
+	struct vrt_blob blob[1];
+	struct vrt_endpoint *nvep, *ret;
+	const struct suckaddr *client_bogo;
+
+	AN(vep);
+	AN(sa);
+
+	nvep = VRT_Endpoint_Clone(vep);
+	AN(nvep);
+
+	if (VSA_Get_Proto(sa) == AF_INET6)
+		client_bogo = bogo_ip6;
+	else
+		client_bogo = bogo_ip;
+
+	preamble = VSB_new_auto();
+	AN(preamble);
+	VPX_Format_Proxy(preamble, 2, client_bogo, sa, NULL);
+	blob->blob = VSB_data(preamble);
+	blob->len = VSB_len(preamble);
+	nvep->preamble = blob;
+	ret = VRT_Endpoint_Clone(nvep);
+	VSB_destroy(&preamble);
+	free(nvep);
+
+	return (ret);
+}
+
 VCL_BACKEND
 VRT_new_backend_clustered(VRT_CTX, struct vsmw_cluster *vc,
-    const struct vrt_backend *vrt)
+    const struct vrt_backend *vrt, VCL_BACKEND via)
 {
 	struct backend *be;
 	struct vcl *vcl;
 	const struct vrt_backend_probe *vbp;
 	const struct vrt_endpoint *vep;
 	const struct vdi_methods *m;
-	const struct suckaddr *sa;
+	const struct suckaddr *sa = NULL;
 	char abuf[VTCP_ADDRBUFSIZE];
+	const struct backend *viabe = NULL;
 
 	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(vrt, VRT_BACKEND_MAGIC);
@@ -585,6 +658,12 @@ VRT_new_backend_clustered(VRT_CTX, struct vsmw_cluster *vc,
 		assert(vep->ipv4== NULL && vep->ipv6== NULL);
 	}
 
+	if (via) {
+		viabe = via_resolve(ctx, vep, via);
+		if (viabe == NULL)
+			return (NULL);
+	}
+
 	vcl = ctx->vcl;
 	AN(vcl);
 	AN(vrt->vcl_name);
@@ -594,14 +673,13 @@ VRT_new_backend_clustered(VRT_CTX, struct vsmw_cluster *vc,
 	if (be == NULL)
 		return (NULL);
 
-	vep = be->endpoint = VRT_Endpoint_Clone(vep);
 #define DA(x)	do { if (vrt->x != NULL) REPLACE((be->x), (vrt->x)); } while (0)
 #define DN(x)	do { be->x = vrt->x; } while (0)
 	VRT_BACKEND_HANDLE();
 #undef DA
 #undef DN
 
-	if (be->hosthdr == NULL) {
+	if (viabe || be->hosthdr == NULL) {
 		if (vrt->endpoint->uds_path != NULL)
 			sa = bogo_ip;
 		else if (cache_param->prefer_ipv6 && vep->ipv6 != NULL)
@@ -610,8 +688,10 @@ VRT_new_backend_clustered(VRT_CTX, struct vsmw_cluster *vc,
 			sa = vep->ipv4;
 		else
 			sa = vep->ipv6;
-		VTCP_name(sa, abuf, sizeof abuf, NULL, 0);
-		REPLACE(be->hosthdr, abuf);
+		if (be->hosthdr == NULL) {
+			VTCP_name(sa, abuf, sizeof abuf, NULL, 0);
+			REPLACE(be->hosthdr, abuf);
+		}
 	}
 
 	be->vsc = VSC_vbe_New(vc, &be->vsc_seg,
@@ -620,6 +700,12 @@ VRT_new_backend_clustered(VRT_CTX, struct vsmw_cluster *vc,
 	if (! vcl->temp->is_warm)
 		VRT_VSC_Hide(be->vsc_seg);
 
+	if (viabe)
+		vep = be->endpoint = via_endpoint(viabe->endpoint, sa);
+	else
+		vep = be->endpoint = VRT_Endpoint_Clone(vep);
+
+	AN(vep);
 	be->conn_pool = VCP_Ref(vep, vbe_proto_ident);
 	AN(be->conn_pool);
 
@@ -652,12 +738,12 @@ VRT_new_backend_clustered(VRT_CTX, struct vsmw_cluster *vc,
 }
 
 VCL_BACKEND
-VRT_new_backend(VRT_CTX, const struct vrt_backend *vrt)
+VRT_new_backend(VRT_CTX, const struct vrt_backend *vrt, VCL_BACKEND via)
 {
 
 	CHECK_OBJ_NOTNULL(vrt, VRT_BACKEND_MAGIC);
 	CHECK_OBJ_NOTNULL(vrt->endpoint, VRT_ENDPOINT_MAGIC);
-	return (VRT_new_backend_clustered(ctx, NULL, vrt));
+	return (VRT_new_backend_clustered(ctx, NULL, vrt, via));
 }
 
 /*--------------------------------------------------------------------
diff --git a/bin/varnishtest/tests/d00007.vtc b/bin/varnishtest/tests/d00007.vtc
index 2c05693dc..4198e3fb3 100644
--- a/bin/varnishtest/tests/d00007.vtc
+++ b/bin/varnishtest/tests/d00007.vtc
@@ -7,32 +7,84 @@ server s1 {
 	accept
 	rxreq
 	txresp
+	close
+	accept
+	rxreq
+	txresp
+	close
+	accept
+	rxreq
+	txresp
+} -start
+
+# the use case for via-proxy is to have a(n ha)proxy make a(n ssl)
+# connection on our behalf. For the purpose of testing, we use another
+# varnish in place - but we are behaving realistically in that we do
+# not use any prior information for the actual backend connection -
+# just the information from the proxy protocol
+
+varnish v2 -proto PROXY -vcl {
+	import debug;
+	import std;
+
+	backend dummy { .host = "${bad_backend}"; }
+
+	sub vcl_init {
+		new s1 = debug.dyn("0.0.0.0", "0");
+	}
+
+	sub vcl_recv {
+		s1.refresh(server.ip, std.port(server.ip));
+		set req.backend_hint = s1.backend();
+		return (pass);
+	}
 } -start
 
+#
+# we vtc.sleep to make sure that the health check is done and server
+# s1 has accepted again. We would rather want to use barriers, but
+# there is a (yet not understood) bug in varnishtest which prevents
+# the bX_sock marcros from being available in the second varnish
+# instance
+
 varnish v1 -vcl {
 	import debug;
+	import vtc;
 
 	backend dummy { .host = "${bad_backend}"; }
 
 	probe pr {
 		.threshold = 8;
 		.initial = 8;
+		.interval = 1m;
 	}
 
+	backend v2 { .host = "${v2_addr}"; .port = "${v2_port}"; }
+
 	sub vcl_init {
 		new s1 = debug.dyn("0.0.0.0", "0");
 	}
 
 	sub vcl_recv {
-		s1.refresh("${s1_addr}", "${s1_port}", pr);
+		if (req.url == "/1") {
+			s1.refresh("${s1_addr}", "${s1_port}", pr);
+			vtc.sleep(1s);
+		} else if (req.url == "/2") {
+			s1.refresh("${s1_addr}", "${s1_port}", pr,
+			    via=v2);
+			vtc.sleep(1s);
+		}
 		set req.backend_hint = s1.backend();
 	}
 } -start
 
-varnish v1 -expect MAIN.n_backend == 2
+varnish v1 -expect MAIN.n_backend == 3
 
 client c1 {
-	txreq
+	txreq -url /1
+	rxresp
+	expect resp.status == 200
+	txreq -url /2
 	rxresp
 	expect resp.status == 200
 } -run
diff --git a/include/vrt.h b/include/vrt.h
index 8242c63ef..4f524e4a5 100644
--- a/include/vrt.h
+++ b/include/vrt.h
@@ -61,6 +61,8 @@
  *	VXID is 64 bit
  *	[cache.h] http_GetRange() changed
  *	exp_close added to struct vrt_backend_probe
+ *	VRT_new_backend() signature changed
+ *	VRT_new_backend_clustered() signature changed
  * 16.0 (2022-09-15)
  *	VMOD C-prototypes moved into JSON
  *	VRT_AddVDP() deprecated
@@ -613,9 +615,9 @@ struct vrt_backend_probe {
 };
 
 /* Backend related */
-VCL_BACKEND VRT_new_backend(VRT_CTX, const struct vrt_backend *);
+VCL_BACKEND VRT_new_backend(VRT_CTX, const struct vrt_backend *, VCL_BACKEND);
 VCL_BACKEND VRT_new_backend_clustered(VRT_CTX,
-    struct vsmw_cluster *, const struct vrt_backend *);
+    struct vsmw_cluster *, const struct vrt_backend *, VCL_BACKEND);
 size_t VRT_backend_vsm_need(VRT_CTX);
 void VRT_delete_backend(VRT_CTX, VCL_BACKEND *);
 struct vrt_endpoint *VRT_Endpoint_Clone(const struct vrt_endpoint *vep);
diff --git a/lib/libvcc/vcc_backend.c b/lib/libvcc/vcc_backend.c
index f9d31ce8c..ad694f11d 100644
--- a/lib/libvcc/vcc_backend.c
+++ b/lib/libvcc/vcc_backend.c
@@ -606,7 +606,7 @@ vcc_ParseHostDef(struct vcc *tl, const struct token *t_be, const char *vgcname)
 	ifp = New_IniFin(tl);
 	VSB_printf(ifp->ini,
 	    "\t%s =\n\t    VRT_new_backend_clustered(ctx, vsc_cluster,\n"
-	    "\t\t&vgc_dir_priv_%s);\n",
+	    "\t\t&vgc_dir_priv_%s, NULL);\n",
 	    vgcname, vgcname);
 	VSB_printf(ifp->ini,
 	    "\tif (%s)\n\t\tVRT_StaticDirector(%s);", vgcname, vgcname);
diff --git a/vmod/vmod_debug.c b/vmod/vmod_debug.c
index a94ae3ab3..8d400256e 100644
--- a/vmod/vmod_debug.c
+++ b/vmod/vmod_debug.c
@@ -577,7 +577,7 @@ create_cold_backend(VRT_CTX)
 	INIT_OBJ(be, VRT_BACKEND_MAGIC);
 	be->endpoint = vep;
 	be->vcl_name = "doomed";
-	return (VRT_new_backend(ctx, be));
+	return (VRT_new_backend(ctx, be, NULL));
 }
 
 static int
diff --git a/vmod/vmod_debug.vcc b/vmod/vmod_debug.vcc
index ced0725e9..fbeebd214 100644
--- a/vmod/vmod_debug.vcc
+++ b/vmod/vmod_debug.vcc
@@ -152,7 +152,7 @@ $Function BOOL fail2()
 
 Function to fail vcl code. Always returns true.
 
-$Object dyn(STRING addr, STRING port, PROBE probe=0)
+$Object dyn(STRING addr, STRING port, PROBE probe=0, BACKEND via=0)
 
 Dynamically create a single-backend director, addr and port must not be empty.
 
@@ -160,7 +160,7 @@ $Method BACKEND .backend()
 
 Return the dynamic backend.
 
-$Method VOID .refresh(STRING addr, STRING port, PROBE probe=0)
+$Method VOID .refresh(STRING addr, STRING port, PROBE probe=0, BACKEND via=0)
 
 Dynamically refresh & (always!) replace the backend by a new one.
 
diff --git a/vmod/vmod_debug_dyn.c b/vmod/vmod_debug_dyn.c
index 2f1a1d180..24685cc83 100644
--- a/vmod/vmod_debug_dyn.c
+++ b/vmod/vmod_debug_dyn.c
@@ -62,7 +62,7 @@ struct xyzzy_debug_dyn_uds {
 
 static void
 dyn_dir_init(VRT_CTX, struct xyzzy_debug_dyn *dyn,
-     VCL_STRING addr, VCL_STRING port, VCL_PROBE probe)
+    VCL_STRING addr, VCL_STRING port, VCL_PROBE probe, VCL_BACKEND via)
 {
 	const struct suckaddr *sa;
 	VCL_BACKEND dir, dir2;
@@ -72,6 +72,7 @@ dyn_dir_init(VRT_CTX, struct xyzzy_debug_dyn *dyn,
 	CHECK_OBJ_NOTNULL(dyn, VMOD_DEBUG_DYN_MAGIC);
 	XXXAN(addr);
 	XXXAN(port);
+	CHECK_OBJ_ORNULL(via, DIRECTOR_MAGIC);
 
 	INIT_OBJ(&vep, VRT_ENDPOINT_MAGIC);
 	INIT_OBJ(&vrt, VRT_BACKEND_MAGIC);
@@ -89,7 +90,7 @@ dyn_dir_init(VRT_CTX, struct xyzzy_debug_dyn *dyn,
 	else
 		WRONG("Wrong proto family");
 
-	dir = VRT_new_backend(ctx, &vrt);
+	dir = VRT_new_backend(ctx, &vrt, via);
 	AN(dir);
 
 	/*
@@ -110,7 +111,8 @@ dyn_dir_init(VRT_CTX, struct xyzzy_debug_dyn *dyn,
 
 VCL_VOID
 xyzzy_dyn__init(VRT_CTX, struct xyzzy_debug_dyn **dynp,
-    const char *vcl_name, VCL_STRING addr, VCL_STRING port, VCL_PROBE probe)
+    const char *vcl_name, VCL_STRING addr, VCL_STRING port, VCL_PROBE probe,
+    VCL_BACKEND via)
 {
 	struct xyzzy_debug_dyn *dyn;
 
@@ -132,7 +134,7 @@ xyzzy_dyn__init(VRT_CTX, struct xyzzy_debug_dyn **dynp,
 
 	AZ(pthread_mutex_init(&dyn->mtx, NULL));
 
-	dyn_dir_init(ctx, dyn, addr, port, probe);
+	dyn_dir_init(ctx, dyn, addr, port, probe, via);
 	XXXAN(dyn->dir);
 	*dynp = dyn;
 }
@@ -160,11 +162,11 @@ xyzzy_dyn_backend(VRT_CTX, struct xyzzy_debug_dyn *dyn)
 
 VCL_VOID
 xyzzy_dyn_refresh(VRT_CTX, struct xyzzy_debug_dyn *dyn,
-    VCL_STRING addr, VCL_STRING port, VCL_PROBE probe)
+    VCL_STRING addr, VCL_STRING port, VCL_PROBE probe, VCL_BACKEND via)
 {
 	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(dyn, VMOD_DEBUG_DYN_MAGIC);
-	dyn_dir_init(ctx, dyn, addr, port, probe);
+	dyn_dir_init(ctx, dyn, addr, port, probe, via);
 }
 
 static int
@@ -207,7 +209,8 @@ dyn_uds_init(VRT_CTX, struct xyzzy_debug_dyn_uds *uds, VCL_STRING path)
 	vep.ipv4 = NULL;
 	vep.ipv6 = NULL;
 
-	if ((dir = VRT_new_backend(ctx, &vrt)) == NULL)
+	// we support via: uds -> ip, but not via: ip -> uds
+	if ((dir = VRT_new_backend(ctx, &vrt, NULL)) == NULL)
 		return (-1);
 
 	AZ(pthread_mutex_lock(&uds->mtx));