r2936 - trunk/varnish-cache/bin/varnishd

phk at projects.linpro.no phk at projects.linpro.no
Fri Jul 11 21:49:21 CEST 2008


Author: phk
Date: 2008-07-11 21:49:20 +0200 (Fri, 11 Jul 2008)
New Revision: 2936

Modified:
   trunk/varnish-cache/bin/varnishd/cache_backend.c
   trunk/varnish-cache/bin/varnishd/cache_backend.h
   trunk/varnish-cache/bin/varnishd/cache_backend_cfg.c
   trunk/varnish-cache/bin/varnishd/heritage.h
   trunk/varnish-cache/bin/varnishd/mgt_param.c
Log:
Drop pseudo-automatic support for multihomed backends and require
clear expression of intent in VCL.

We now fail backend hostnames that resolve to multiple IPv4 or multiple
IPv6 addresses, in other words, you cannot use "cnn.com" as a backend
hostname specification without the compiler yelling at you:

    % ./varnishd -d -d -b cnn.com -a :8080
    Backend host "cnn.com": resolves to multiple IPv4 addresses.
    Only one address is allowed.
    Please specify which exact address you want to use, we found these:
	    64.236.16.20
	    64.236.16.52
	    64.236.24.12
	    64.236.29.120
    [...]
    VCL compilation failed

However, you _can_ use a hostname that resolves to both an IPv4 and
an IPv6 address, and the new paramter "prefer_ipv6" will determine
which one we try first in such cases.

The other part of this change is that we now do the DNS lookup at
VCL compiletime, and only then.

If your backend's DNS record (or /etc/hosts entry) changes IP#, you
must reload your VCL code to notify varnish.

Finer technical points:

We build a bytestring representation of the sockaddr's in VCC and
include them in the concept of backend identity, for an existing
backend (+ connections) to be reused for a new VCL the backend must
now be defined exactly the same way AND have the same resolved
IPv4/IPv6 addresses.

Since we never muck about with the address in the backend struct
anymore, it's static for the life of the struct backend instance,
we can simplify and eliminate the locking dance around our connection
attempts.

Also eliminate the struct vrt_backend inclusion in struct backend,
and instead make the relevat fields full-blown members of struct
backend.  This eliminates a number of TRUST_ME() calls.

This is the companion commit to #2934 which prepared the VCL compiler.


Modified: trunk/varnish-cache/bin/varnishd/cache_backend.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_backend.c	2008-07-11 18:57:13 UTC (rev 2935)
+++ trunk/varnish-cache/bin/varnishd/cache_backend.c	2008-07-11 19:49:20 UTC (rev 2936)
@@ -41,11 +41,9 @@
 #include <poll.h>
 
 #include <sys/socket.h>
-#include <netdb.h>
 
 #include "shmlog.h"
 #include "cache.h"
-#include "vrt.h"
 #include "cache_backend.h"
 
 /*
@@ -70,7 +68,7 @@
 	CHECK_OBJ_NOTNULL(sp->bereq->http, HTTP_MAGIC);
 	CHECK_OBJ_NOTNULL(sp->backend, BACKEND_MAGIC);
 	http_PrintfHeader(sp->wrk, sp->fd, sp->bereq->http,
-	    "Host: %s", sp->backend->vrt->hostname);
+	    "Host: %s", sp->backend->hosthdr);
 }
 
 /*--------------------------------------------------------------------
@@ -83,11 +81,8 @@
  */
 
 static int
-VBE_TryConnect(const struct sess *sp, const struct addrinfo *ai)
+VBE_TryConnect(const struct sess *sp, int pf, const struct sockaddr *sa, socklen_t salen)
 {
-	struct sockaddr_storage ss;
-	int fam, sockt, proto;
-	socklen_t alen;
 	int s, i, tmo;
 	char abuf1[TCP_ADDRBUFSIZE], abuf2[TCP_ADDRBUFSIZE];
 	char pbuf1[TCP_PORTBUFSIZE], pbuf2[TCP_PORTBUFSIZE];
@@ -95,47 +90,31 @@
 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
 	CHECK_OBJ_NOTNULL(sp->backend, BACKEND_MAGIC);
 
-	/*
-	 * ai is only valid with the lock held, so copy out the bits
-	 * we need to make the connection
-	 */
-	fam = ai->ai_family;
-	sockt = ai->ai_socktype;
-	proto = ai->ai_protocol;
-	alen = ai->ai_addrlen;
-	assert(alen <= sizeof ss);
-	memcpy(&ss, ai->ai_addr, alen);
-
-	/* release lock during stuff that can take a long time */
-	UNLOCK(&sp->backend->mtx);
-
-	s = socket(fam, sockt, proto);
+	s = socket(pf, SOCK_STREAM, 0);
 	if (s < 0) {
 		LOCK(&sp->backend->mtx);
 		return (s);
 	}
 
 	tmo = params->connect_timeout;
-	if (sp->backend->vrt->connect_timeout > 10e-3)
-		tmo = sp->backend->vrt->connect_timeout * 1000;
+	if (sp->backend->connect_timeout > 10e-3)
+		tmo = sp->backend->connect_timeout * 1000;
 
 	if (tmo > 0)
-		i = TCP_connect(s, (void *)&ss, alen, tmo);
+		i = TCP_connect(s, sa, salen, tmo);
 	else
-		i = connect(s, (void *)&ss, alen);
+		i = connect(s, sa, salen);
 
 	if (i != 0) {
 		AZ(close(s));
-		LOCK(&sp->backend->mtx);
 		return (-1);
 	}
 
 	TCP_myname(s, abuf1, sizeof abuf1, pbuf1, sizeof pbuf1);
-	TCP_name((void*)&ss, alen, abuf2, sizeof abuf2, pbuf2, sizeof pbuf2);
+	TCP_name(sa, salen, abuf2, sizeof abuf2, pbuf2, sizeof pbuf2);
 	WSL(sp->wrk, SLT_BackendOpen, s, "%s %s %s %s %s",
-	    sp->backend->vrt->vcl_name, abuf1, pbuf1, abuf2, pbuf2);
+	    sp->backend->vcl_name, abuf1, pbuf1, abuf2, pbuf2);
 
-	LOCK(&sp->backend->mtx);
 	return (s);
 }
 
@@ -254,54 +233,6 @@
 	}
 }
 
-/*--------------------------------------------------------------------
- * Try to get a socket connected to one of the addresses on the list.
- * We start from the cached "last good" address and try all items on
- * the list exactly once.
- *
- * Called with backend mutex held, but will release/acquire it.
- *
- * XXX: Not ready for DNS re-lookups
- */
-
-static int
-bes_conn_try_list(const struct sess *sp, struct backend *bp)
-{
-	struct addrinfo *ai, *from;
-	int s, loops;
-
-	CHECK_OBJ_NOTNULL(bp, BACKEND_MAGIC);
-
-	/* No addrinfo, no connection */
-	if (bp->ai == NULL)
-		return (-1);
-	AN(bp->last_ai);
-
-	/* Called with lock held */
-	loops = 0;
-	ai = from = bp->last_ai;
-	while (loops == 0 || ai != from) {
-
-		/* NB: releases/acquires backend lock */
-		s = VBE_TryConnect(sp, ai);
-
-		if (s >= 0) { 
-			bp->last_ai = ai;
-			return (s);
-		}
-
-		/* Try next one */
-		ai = ai->ai_next;
-		if (ai == NULL) {
-			loops++;
-			ai = bp->ai;
-		}
-	}
-	/* We have tried them all, fail */
-	return (-1);
-}
-
-
 /*--------------------------------------------------------------------*/
 
 static int
@@ -311,10 +242,25 @@
 
 	LOCK(&bp->mtx);
 	bp->refcount++;
-	s = bes_conn_try_list(sp, bp);	/* releases/acquires backend lock */
-	if (s < 0)
+	UNLOCK(&sp->backend->mtx);
+
+	s = -1;
+	assert(bp->ipv6 != NULL || bp->ipv4 != NULL);
+
+	/* release lock during stuff that can take a long time */
+
+	if (params->prefer_ipv6 && bp->ipv6 != NULL)
+		s = VBE_TryConnect(sp, PF_INET6, bp->ipv6, bp->ipv6len);
+	if (s == -1 && bp->ipv4 != NULL)
+		s = VBE_TryConnect(sp, PF_INET, bp->ipv4, bp->ipv4len);
+	if (s == -1 && !params->prefer_ipv6 && bp->ipv6 != NULL)
+		s = VBE_TryConnect(sp, PF_INET6, bp->ipv6, bp->ipv6len);
+
+	if (s < 0) {
+		LOCK(&sp->backend->mtx);
 		bp->refcount--;		/* Only keep ref on success */
-	UNLOCK(&bp->mtx);
+		UNLOCK(&bp->mtx);
+	}
 	return (s);
 }
 
@@ -377,7 +323,7 @@
 	CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC);
 	b = vc->backend;
 	assert(vc->fd >= 0);
-	WSL(w, SLT_BackendClose, vc->fd, "%s", vc->backend->vrt->vcl_name);
+	WSL(w, SLT_BackendClose, vc->fd, "%s", vc->backend->vcl_name);
 	i = close(vc->fd);
 	assert(i == 0 || errno == ECONNRESET || errno == ENOTCONN);
 	vc->fd = -1;
@@ -398,7 +344,7 @@
 	CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC);
 	assert(vc->fd >= 0);
 	bp = vc->backend;
-	WSL(w, SLT_BackendReuse, vc->fd, "%s", vc->backend->vrt->vcl_name);
+	WSL(w, SLT_BackendReuse, vc->fd, "%s", vc->backend->vcl_name);
 	LOCK(&vc->backend->mtx);
 	VSL_stats->backend_recycle++;
 	VTAILQ_INSERT_HEAD(&bp->connlist, vc, list);

Modified: trunk/varnish-cache/bin/varnishd/cache_backend.h
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_backend.h	2008-07-11 18:57:13 UTC (rev 2935)
+++ trunk/varnish-cache/bin/varnishd/cache_backend.h	2008-07-11 19:49:20 UTC (rev 2936)
@@ -73,15 +73,21 @@
 	unsigned		magic;
 #define BACKEND_MAGIC		0x64c4c7c6
 
-	struct vrt_backend	vrt[1];
+	char			*hosthdr;
+	char			*ident;
+	char			*vcl_name;
+	double			connect_timeout;
+
 	uint32_t		hash;
 
 	VTAILQ_ENTRY(backend)	list;
 	int			refcount;
 	pthread_mutex_t		mtx;
 
-	struct addrinfo		*ai;
-	struct addrinfo		*last_ai;
+	struct sockaddr		*ipv4;
+	socklen_t		ipv4len;
+	struct sockaddr		*ipv6;
+	socklen_t		ipv6len;
 
 	VTAILQ_HEAD(, vbe_conn)	connlist;
 

Modified: trunk/varnish-cache/bin/varnishd/cache_backend_cfg.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_backend_cfg.c	2008-07-11 18:57:13 UTC (rev 2935)
+++ trunk/varnish-cache/bin/varnishd/cache_backend_cfg.c	2008-07-11 19:49:20 UTC (rev 2936)
@@ -41,7 +41,6 @@
 #include <poll.h>
 
 #include <sys/socket.h>
-#include <netdb.h>
 
 #include "shmlog.h"
 #include "cache.h"
@@ -99,9 +98,10 @@
 			AZ(close(vbe->fd));
 		VBE_ReleaseConn(vbe);
 	}
-	free(TRUST_ME(b->vrt->ident));
-	free(TRUST_ME(b->vrt->hostname));
-	free(TRUST_ME(b->vrt->portname));
+	free(b->ident);
+	free(b->hosthdr);
+	free(b->ipv4);
+	free(b->ipv6);
 	b->magic = 0;
 	free(b);
 	VSL_stats->n_backend--;
@@ -117,44 +117,17 @@
 	VBE_DropRefLocked(b);
 }
 
-/*--------------------------------------------------------------------
- * DNS lookup of backend host/port
- */
+/*--------------------------------------------------------------------*/
 
 static void
-vbe_dns_lookup(const struct cli *cli, struct backend *bp)
+copy_sockaddr(struct sockaddr **sa, socklen_t *len, const unsigned char *src)
 {
-	int error;
-	struct addrinfo *res, hint, *old;
 
-	CHECK_OBJ_NOTNULL(bp, BACKEND_MAGIC);
-
-	memset(&hint, 0, sizeof hint);
-	hint.ai_family = PF_UNSPEC;
-	hint.ai_socktype = SOCK_STREAM;
-	res = NULL;
-	error = getaddrinfo(bp->vrt->hostname, bp->vrt->portname,
-	    &hint, &res);
-	if (error) {
-		if (res != NULL)
-			freeaddrinfo(res);
-		/*
-		 * We cannot point to the source code any more, it may
-		 * be long gone from memory.   We already checked over in
-		 * the VCL compiler, so this is only relevant for refreshes.
-		 * XXX: which we do when exactly ?
-		 */
-		cli_out(cli, "DNS(/hosts) lookup failed for (%s/%s): %s",
-		    bp->vrt->hostname, bp->vrt->portname, gai_strerror(error));
-		return;
-	}
-	LOCK(&bp->mtx);
-	old = bp->ai;
-	bp->ai = res;
-	bp->last_ai = res;
-	UNLOCK(&bp->mtx);
-	if (old != NULL)
-		freeaddrinfo(old);
+	assert(*src > 0);
+	*sa = malloc(*src);
+	AN(*sa);
+	memcpy(*sa, src + 1, *src);
+	*len = *src;
 }
 
 /*--------------------------------------------------------------------
@@ -169,25 +142,46 @@
 	struct backend *b;
 	uint32_t u;
 
-	AN(vb->hostname);
-	AN(vb->portname);
 	AN(vb->ident);
+	assert(vb->ipv4_sockaddr != NULL || vb->ipv6_sockaddr != NULL);
 	(void)cli;
 	ASSERT_CLI();
-	u = crc32_l(vb->ident, strlen(vb->ident));
+
+	/* calculate a hash of (ident + ipv4_sockaddr + ipv6_sockaddr) */
+	u = crc32(~0U, vb->ident, strlen(vb->ident));
+	if (vb->ipv4_sockaddr != NULL)
+		u = crc32(u, vb->ipv4_sockaddr + 1, vb->ipv4_sockaddr[0]);
+	if (vb->ipv6_sockaddr != NULL)
+		u = crc32(u, vb->ipv6_sockaddr + 1, vb->ipv6_sockaddr[0]);
+
+	/* Run through the list and see if we already have this backend */
 	VTAILQ_FOREACH(b, &backends, list) {
 		CHECK_OBJ_NOTNULL(b, BACKEND_MAGIC);
 		if (u != b->hash)
 			continue;
-		if (strcmp(b->vrt->ident, vb->ident))
+		if (strcmp(b->ident, vb->ident))
 			continue;
+		if (vb->ipv4_sockaddr != NULL &&
+		    b->ipv4len != vb->ipv4_sockaddr[0])
+			continue;
+		if (vb->ipv6_sockaddr != NULL &&
+		    b->ipv6len != vb->ipv6_sockaddr[0])
+			continue;
+		if (b->ipv4len != 0 &&
+		    memcmp(b->ipv4, vb->ipv4_sockaddr + 1, b->ipv4len))
+			continue;
+		if (b->ipv6len != 0 &&
+		    memcmp(b->ipv6, vb->ipv6_sockaddr + 1, b->ipv6len))
+			continue;
 		b->refcount++;
 		return (b);
 	}
 
+	/* Create new backend */
 	ALLOC_OBJ(b, BACKEND_MAGIC);
 	XXXAN(b);
-	b->magic = BACKEND_MAGIC;
+	MTX_INIT(&b->mtx);
+	b->refcount = 1;
 
 	VTAILQ_INIT(&b->connlist);
 	b->hash = u;
@@ -196,17 +190,21 @@
 	 * This backend may live longer than the VCL that instantiated it
 	 * so we cannot simply reference the VCL's copy of things.
 	 */
-	REPLACE(b->vrt->ident, vb->ident);
-	REPLACE(b->vrt->hostname, vb->hostname);
-	REPLACE(b->vrt->portname, vb->portname);
-	REPLACE(b->vrt->vcl_name, vb->vcl_name);
+	REPLACE(b->ident, vb->ident);
+	REPLACE(b->vcl_name, vb->vcl_name);
+	REPLACE(b->hosthdr, vb->hostname);
 
-	b->vrt->connect_timeout = vb->connect_timeout;
+	b->connect_timeout = vb->connect_timeout;
 
-	MTX_INIT(&b->mtx);
-	b->refcount = 1;
+	/*
+	 * Copy over the sockaddrs
+	 */
+	if (vb->ipv4_sockaddr != NULL) 
+		copy_sockaddr(&b->ipv4, &b->ipv4len, vb->ipv4_sockaddr);
+	if (vb->ipv6_sockaddr != NULL) 
+		copy_sockaddr(&b->ipv6, &b->ipv6len, vb->ipv6_sockaddr);
 
-	vbe_dns_lookup(cli, b);
+	assert(b->ipv4 != NULL || b->ipv6 != NULL);
 
 	VTAILQ_INSERT_TAIL(&backends, b, list);
 	VSL_stats->n_backend++;
@@ -238,11 +236,9 @@
 	ASSERT_CLI();
 	VTAILQ_FOREACH(b, &backends, list) {
 		CHECK_OBJ_NOTNULL(b, BACKEND_MAGIC);
-		cli_out(cli, "%p %s/%s/%s %d\n",
+		cli_out(cli, "%p %s %d\n",
 		    b,
-		    b->vrt->vcl_name,
-		    b->vrt->hostname,
-		    b->vrt->portname,
+		    b->vcl_name,
 		    b->refcount);
 	}
 }

Modified: trunk/varnish-cache/bin/varnishd/heritage.h
===================================================================
--- trunk/varnish-cache/bin/varnishd/heritage.h	2008-07-11 18:57:13 UTC (rev 2935)
+++ trunk/varnish-cache/bin/varnishd/heritage.h	2008-07-11 19:49:20 UTC (rev 2936)
@@ -166,6 +166,9 @@
 
 	/* Log local socket address to shm */
 	unsigned		log_local_addr;
+
+	/* Prefer IPv6 connections to backend*/
+	unsigned		prefer_ipv6;
 };
 
 extern volatile struct params *params;

Modified: trunk/varnish-cache/bin/varnishd/mgt_param.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/mgt_param.c	2008-07-11 18:57:13 UTC (rev 2935)
+++ trunk/varnish-cache/bin/varnishd/mgt_param.c	2008-07-11 19:49:20 UTC (rev 2936)
@@ -735,6 +735,11 @@
 		"VCL can override this default value for each backend.",
 		0,
 		"400", "ms" },
+	{ "prefer_ipv6", tweak_bool, &master.prefer_ipv6, 0, 0,
+		"Prefer IPv6 address when connecting to backends which "
+		"have both IPv4 and IPv6 addresses.",
+		0,
+		"off", "bool" },
 	{ "session_linger", tweak_uint,
 		&master.session_linger,0, UINT_MAX,
 		"How long time the workerthread lingers on the session "




More information about the varnish-commit mailing list