[6.0] 02b7e718f holddown times for certain backend connection errors

Dridi Boukelmoune dridi.boukelmoune at gmail.com
Thu Aug 16 08:53:11 UTC 2018


commit 02b7e718f4616978b6e1f66ffa4f53948c74f0d4
Author: Nils Goroll <nils.goroll at uplex.de>
Date:   Tue Jun 5 10:13:44 2018 +0200

    holddown times for certain backend connection errors
    
    This is similar to the vca pace: Depending on the backend connection
    error, it does not make sense to re-try in rapid succession, instead
    not attempting the failed connection again for some time will save
    resources both locally and remotely, where applicable, and should
    thus help improve the overall situation.
    
    Fixes #2622

diff --git a/bin/varnishd/VSC_vbe.vsc b/bin/varnishd/VSC_vbe.vsc
index b21590965..5212119cd 100644
--- a/bin/varnishd/VSC_vbe.vsc
+++ b/bin/varnishd/VSC_vbe.vsc
@@ -124,4 +124,13 @@
 	:level: diag
 	:oneliner:	Connections failed for other reason
 
+.. varnish_vsc:: helddown
+	:type:	counter
+	:level: diag
+	:oneliner:	Connection opens not attempted
+
+	Connections not attempted during the backend_local_error_holddown
+	or backend_remote_error_holddown interval after a fundamental
+	connection issue.
+
 .. varnish_vsc_end::	vbe
diff --git a/bin/varnishd/cache/cache_tcp_pool.c b/bin/varnishd/cache/cache_tcp_pool.c
index f5fa2a365..d79273586 100644
--- a/bin/varnishd/cache/cache_tcp_pool.c
+++ b/bin/varnishd/cache/cache_tcp_pool.c
@@ -101,6 +101,9 @@ struct conn_pool {
 	int					n_kill;
 
 	int					n_used;
+
+	double					holddown;
+	int					holddown_errno;
 };
 
 struct tcp_pool {
@@ -238,6 +241,7 @@ VCP_New(struct conn_pool *cp, const void *id, void *priv,
 	cp->priv = priv;
 	cp->methods = cm;
 	cp->refcnt = 1;
+	cp->holddown = 0;
 	Lck_New(&cp->mtx, lck_tcp_pool);
 	VTAILQ_INIT(&cp->connlist);
 	VTAILQ_INIT(&cp->killlist);
@@ -375,32 +379,59 @@ VCP_Recycle(const struct worker *wrk, struct pfd **pfdp)
  */
 
 static int
-VCP_Open(const struct conn_pool *cp, double tmo, const void **privp,
+VCP_Open(struct conn_pool *cp, double tmo, const void **privp,
     struct VSC_vbe *vsc)
 {
 	int r;
+	double h;
 
 	CHECK_OBJ_NOTNULL(cp, CONN_POOL_MAGIC);
 
+	while (cp->holddown > 0) {
+		Lck_Lock(&cp->mtx);
+		if (cp->holddown == 0) {
+			Lck_Unlock(&cp->mtx);
+			break;
+		}
+
+		if (VTIM_mono() >= cp->holddown) {
+			cp->holddown = 0;
+			Lck_Unlock(&cp->mtx);
+			break;
+		}
+
+		if (vsc)
+			vsc->helddown++;
+		errno = cp->holddown_errno;
+		Lck_Unlock(&cp->mtx);
+		return (-1);
+	}
+
 	r = cp->methods->open(cp, tmo, privp);
 
 	if (r >= 0 || vsc == NULL)
 		return (r);
 
+	h = 0;
+
 	/* stats access unprotected */
 	switch (errno) {
 	case EACCES:
 	case EPERM:
 		vsc->fail_eacces++;
+		h = cache_param->backend_local_error_holddown;
 		break;
 	case EADDRNOTAVAIL:
 		vsc->fail_eaddrnotavail++;
+		h = cache_param->backend_local_error_holddown;
 		break;
 	case ECONNREFUSED:
 		vsc->fail_econnrefused++;
+		h = cache_param->backend_remote_error_holddown;
 		break;
 	case ENETUNREACH:
 		vsc->fail_enetunreach++;
+		h = cache_param->backend_remote_error_holddown;
 		break;
 	case ETIMEDOUT:
 		vsc->fail_etimedout++;
@@ -410,6 +441,18 @@ VCP_Open(const struct conn_pool *cp, double tmo, const void **privp,
 	}
 	vsc->fail++;
 
+	if (h == 0)
+		return (r);
+
+	Lck_Lock(&cp->mtx);
+	h += VTIM_mono();
+	if (cp->holddown == 0 || h < cp->holddown) {
+		cp->holddown = h;
+		cp->holddown_errno = errno;
+	}
+
+	Lck_Unlock(&cp->mtx);
+
 	return (r);
 }
 
@@ -766,7 +809,7 @@ VTP_Rel(struct tcp_pool **tpp)
  */
 
 int
-VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp,
+VTP_Open(struct tcp_pool *tp, double tmo, const void **privp,
     struct VSC_vbe *vsc)
 {
 	return (VCP_Open(tp->cp, tmo, privp, vsc));
diff --git a/bin/varnishd/cache/cache_tcp_pool.h b/bin/varnishd/cache/cache_tcp_pool.h
index 88b131e9a..9f640da73 100644
--- a/bin/varnishd/cache/cache_tcp_pool.h
+++ b/bin/varnishd/cache/cache_tcp_pool.h
@@ -72,7 +72,7 @@ void VTP_Rel(struct tcp_pool **);
 	 * the pool is destroyed and all cached connections closed.
 	 */
 
-int VTP_Open(const struct tcp_pool *, double tmo, const void **,
+int VTP_Open(struct tcp_pool *, double tmo, const void **,
     struct VSC_vbe *);
 	/*
 	 * Open a new connection and return the adress used.
diff --git a/include/tbl/params.h b/include/tbl/params.h
index e2ecff4c8..054b2f449 100644
--- a/include/tbl/params.h
+++ b/include/tbl/params.h
@@ -285,6 +285,45 @@ PARAM(
 	/* func */	NULL
 )
 
+PARAM(
+	/* name */	backend_local_error_holddown,
+	/* typ */	timeout,
+	/* min */	"0.000",
+	/* max */	NULL,
+	/* default */	"10.000",
+	/* units */	"seconds",
+	/* flags */	EXPERIMENTAL,
+	/* s-text */
+	"When connecting to backends, certain error codes "
+	"(EADDRNOTAVAIL, EACCESS, EPERM) signal a local resource shortage "
+	"or configuration issue for which retrying connection attempts "
+	"may worsen the situation due to the complexity of the operations "
+	"involved in the kernel.\n"
+	"This parameter prevents repeated connection attempts for the "
+	"configured duration.",
+	/* l-text */	"",
+	/* func */	NULL
+)
+
+PARAM(
+	/* name */	backend_remote_error_holddown,
+	/* typ */	timeout,
+	/* min */	"0.000",
+	/* max */	NULL,
+	/* default */	"0.250",
+	/* units */	"seconds",
+	/* flags */	EXPERIMENTAL,
+	/* s-text */
+	"When connecting to backends, certain error codes (ECONNREFUSED, "
+	"ENETUNREACH) signal fundamental connection issues such as the backend "
+	"not accepting connections or routing problems for which repeated "
+	"connection attempts are considered useless\n"
+	"This parameter prevents repeated connection attempts for the "
+	"configured duration.",
+	/* l-text */	"",
+	/* func */	NULL
+)
+
 PARAM(
 	/* name */	cli_limit,
 	/* typ */	bytes_u,


More information about the varnish-commit mailing list