[master] e02e31e holddown times for certain backend connection errors
Nils Goroll
nils.goroll at uplex.de
Mon Jun 11 08:29:10 UTC 2018
commit e02e31ed2ad9a58fdee60628a9c4450bd1266179
Author: Nils Goroll <nils.goroll at uplex.de>
Date: Tue Jun 5 10:13:44 2018 +0200
holddown times for certain backend connection errors
This is similar to the vca pace: Depending on the backend connection
error, it does not make sense to re-try in rapid succession, instead
not attempting the failed connection again for some time will save
resources both locally and remotely, where applicable, and should
thus help improve the overall situation.
Fixes #2622
diff --git a/bin/varnishd/VSC_vbe.vsc b/bin/varnishd/VSC_vbe.vsc
index b215909..5212119 100644
--- a/bin/varnishd/VSC_vbe.vsc
+++ b/bin/varnishd/VSC_vbe.vsc
@@ -124,4 +124,13 @@
:level: diag
:oneliner: Connections failed for other reason
+.. varnish_vsc:: helddown
+ :type: counter
+ :level: diag
+ :oneliner: Connection opens not attempted
+
+ Connections not attempted during the backend_local_error_holddown
+ or backend_remote_error_holddown interval after a fundamental
+ connection issue.
+
.. varnish_vsc_end:: vbe
diff --git a/bin/varnishd/cache/cache_tcp_pool.c b/bin/varnishd/cache/cache_tcp_pool.c
index f5fa2a3..d792735 100644
--- a/bin/varnishd/cache/cache_tcp_pool.c
+++ b/bin/varnishd/cache/cache_tcp_pool.c
@@ -101,6 +101,9 @@ struct conn_pool {
int n_kill;
int n_used;
+
+ double holddown;
+ int holddown_errno;
};
struct tcp_pool {
@@ -238,6 +241,7 @@ VCP_New(struct conn_pool *cp, const void *id, void *priv,
cp->priv = priv;
cp->methods = cm;
cp->refcnt = 1;
+ cp->holddown = 0;
Lck_New(&cp->mtx, lck_tcp_pool);
VTAILQ_INIT(&cp->connlist);
VTAILQ_INIT(&cp->killlist);
@@ -375,32 +379,59 @@ VCP_Recycle(const struct worker *wrk, struct pfd **pfdp)
*/
static int
-VCP_Open(const struct conn_pool *cp, double tmo, const void **privp,
+VCP_Open(struct conn_pool *cp, double tmo, const void **privp,
struct VSC_vbe *vsc)
{
int r;
+ double h;
CHECK_OBJ_NOTNULL(cp, CONN_POOL_MAGIC);
+ while (cp->holddown > 0) {
+ Lck_Lock(&cp->mtx);
+ if (cp->holddown == 0) {
+ Lck_Unlock(&cp->mtx);
+ break;
+ }
+
+ if (VTIM_mono() >= cp->holddown) {
+ cp->holddown = 0;
+ Lck_Unlock(&cp->mtx);
+ break;
+ }
+
+ if (vsc)
+ vsc->helddown++;
+ errno = cp->holddown_errno;
+ Lck_Unlock(&cp->mtx);
+ return (-1);
+ }
+
r = cp->methods->open(cp, tmo, privp);
if (r >= 0 || vsc == NULL)
return (r);
+ h = 0;
+
/* stats access unprotected */
switch (errno) {
case EACCES:
case EPERM:
vsc->fail_eacces++;
+ h = cache_param->backend_local_error_holddown;
break;
case EADDRNOTAVAIL:
vsc->fail_eaddrnotavail++;
+ h = cache_param->backend_local_error_holddown;
break;
case ECONNREFUSED:
vsc->fail_econnrefused++;
+ h = cache_param->backend_remote_error_holddown;
break;
case ENETUNREACH:
vsc->fail_enetunreach++;
+ h = cache_param->backend_remote_error_holddown;
break;
case ETIMEDOUT:
vsc->fail_etimedout++;
@@ -410,6 +441,18 @@ VCP_Open(const struct conn_pool *cp, double tmo, const void **privp,
}
vsc->fail++;
+ if (h == 0)
+ return (r);
+
+ Lck_Lock(&cp->mtx);
+ h += VTIM_mono();
+ if (cp->holddown == 0 || h < cp->holddown) {
+ cp->holddown = h;
+ cp->holddown_errno = errno;
+ }
+
+ Lck_Unlock(&cp->mtx);
+
return (r);
}
@@ -766,7 +809,7 @@ VTP_Rel(struct tcp_pool **tpp)
*/
int
-VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp,
+VTP_Open(struct tcp_pool *tp, double tmo, const void **privp,
struct VSC_vbe *vsc)
{
return (VCP_Open(tp->cp, tmo, privp, vsc));
diff --git a/bin/varnishd/cache/cache_tcp_pool.h b/bin/varnishd/cache/cache_tcp_pool.h
index 88b131e..9f640da 100644
--- a/bin/varnishd/cache/cache_tcp_pool.h
+++ b/bin/varnishd/cache/cache_tcp_pool.h
@@ -72,7 +72,7 @@ void VTP_Rel(struct tcp_pool **);
* the pool is destroyed and all cached connections closed.
*/
-int VTP_Open(const struct tcp_pool *, double tmo, const void **,
+int VTP_Open(struct tcp_pool *, double tmo, const void **,
struct VSC_vbe *);
/*
* Open a new connection and return the adress used.
diff --git a/include/tbl/params.h b/include/tbl/params.h
index e2ecff4..054b2f4 100644
--- a/include/tbl/params.h
+++ b/include/tbl/params.h
@@ -286,6 +286,45 @@ PARAM(
)
PARAM(
+ /* name */ backend_local_error_holddown,
+ /* typ */ timeout,
+ /* min */ "0.000",
+ /* max */ NULL,
+ /* default */ "10.000",
+ /* units */ "seconds",
+ /* flags */ EXPERIMENTAL,
+ /* s-text */
+ "When connecting to backends, certain error codes "
+ "(EADDRNOTAVAIL, EACCESS, EPERM) signal a local resource shortage "
+ "or configuration issue for which retrying connection attempts "
+ "may worsen the situation due to the complexity of the operations "
+ "involved in the kernel.\n"
+ "This parameter prevents repeated connection attempts for the "
+ "configured duration.",
+ /* l-text */ "",
+ /* func */ NULL
+)
+
+PARAM(
+ /* name */ backend_remote_error_holddown,
+ /* typ */ timeout,
+ /* min */ "0.000",
+ /* max */ NULL,
+ /* default */ "0.250",
+ /* units */ "seconds",
+ /* flags */ EXPERIMENTAL,
+ /* s-text */
+ "When connecting to backends, certain error codes (ECONNREFUSED, "
+ "ENETUNREACH) signal fundamental connection issues such as the backend "
+ "not accepting connections or routing problems for which repeated "
+ "connection attempts are considered useless\n"
+ "This parameter prevents repeated connection attempts for the "
+ "configured duration.",
+ /* l-text */ "",
+ /* func */ NULL
+)
+
+PARAM(
/* name */ cli_limit,
/* typ */ bytes_u,
/* min */ "128b",
More information about the varnish-commit
mailing list