[master] 314b17c Statistics for backend open errors

Nils Goroll nils.goroll at uplex.de
Mon Jun 11 08:29:10 UTC 2018


commit 314b17cccd92b4328da06fa9bcea102cd6a1037e
Author: Nils Goroll <nils.goroll at uplex.de>
Date:   Tue Jun 5 08:04:14 2018 +0200

    Statistics for backend open errors
    
    Previously, we had zero stats on the cause of backend connection
    errors, which made it close to impossible to diagnose such issues in
    retrospect (only via log mining).
    
    We now pass an optional backend vsc to vcp and record errors per
    backend.
    
    Open errors are really per vcp entry (ip + port or udc path), which
    can be shared amongst backends (and even vcls), but we maintain the
    counters per backend (and, consequently, per vcl) for simplicity.  It
    should be noted though that errors for shared endpoints affect all
    backends using them.
    
    Ref #2622

diff --git a/bin/varnishd/VSC_vbe.vsc b/bin/varnishd/VSC_vbe.vsc
index c5ca37f..b215909 100644
--- a/bin/varnishd/VSC_vbe.vsc
+++ b/bin/varnishd/VSC_vbe.vsc
@@ -78,5 +78,50 @@
 	:level:	info
 	:oneliner:	Backend requests sent
 
-.. varnish_vsc_end::	vbe
+..
+	=== Anything below is actually per VCP entry, but collected per
+	=== backend for simplicity
+
+.. varnish_vsc:: fail
+	:type:	counter
+	:level: info
+	:oneliner:	Connections failed
+
+	Counter of failed opens. Detailed reasons are given in the
+	fail_* counters (DIAG level) and in Debug VSL.
+
+	This counter is the sum of all detailed fail_* counters.
+
+	All fail_* counters may be slightly inaccurate for efficiency.
+
+.. varnish_vsc:: fail_eacces
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed with EACCES or EPERM
+
+.. varnish_vsc:: fail_eaddrnotavail
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed with EADDRNOTAVAIL
+
+.. varnish_vsc:: fail_econnrefused
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed with ECONNREFUSED
+
+.. varnish_vsc:: fail_enetunreach
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed with ENETUNREACH
 
+.. varnish_vsc:: fail_etimedout
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed ETIMEDOUT
+
+.. varnish_vsc:: fail_other
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed for other reason
+
+.. varnish_vsc_end::	vbe
diff --git a/bin/varnishd/cache/cache_backend.c b/bin/varnishd/cache/cache_backend.c
index 0f089f0..4513474 100644
--- a/bin/varnishd/cache/cache_backend.c
+++ b/bin/varnishd/cache/cache_backend.c
@@ -113,12 +113,11 @@ vbe_dir_getfd(struct worker *wrk, struct backend *bp, struct busyobj *bo,
 	bo->htc->doclose = SC_NULL;
 
 	FIND_TMO(connect_timeout, tmod, bo, bp);
-	pfd = VTP_Get(bp->tcp_pool, tmod, wrk, force_fresh);
+	pfd = VTP_Get(bp->tcp_pool, tmod, wrk, force_fresh, bp->vsc);
 	if (pfd == NULL) {
 		VSLb(bo->vsl, SLT_FetchError,
 		     "backend %s: fail errno %d (%s)",
 		     VRT_BACKEND_string(bp->director), errno, strerror(errno));
-		// XXX: Per backend stats ?
 		VSC_C_main->backend_fail++;
 		bo->htc = NULL;
 		return (NULL);
diff --git a/bin/varnishd/cache/cache_backend_probe.c b/bin/varnishd/cache/cache_backend_probe.c
index e9f886e..2c87327 100644
--- a/bin/varnishd/cache/cache_backend_probe.c
+++ b/bin/varnishd/cache/cache_backend_probe.c
@@ -272,7 +272,8 @@ vbp_poke(struct vbp_target *vt)
 	t_start = t_now = VTIM_real();
 	t_end = t_start + vt->timeout;
 
-	s = VTP_Open(vt->tcp_pool, t_end - t_now, (const void **)&sa);
+	s = VTP_Open(vt->tcp_pool, t_end - t_now, (const void **)&sa,
+		vt->backend->vsc);
 	if (s < 0) {
 		/* Got no connection: failed */
 		return;
diff --git a/bin/varnishd/cache/cache_tcp_pool.c b/bin/varnishd/cache/cache_tcp_pool.c
index c92ba8c..f5fa2a3 100644
--- a/bin/varnishd/cache/cache_tcp_pool.c
+++ b/bin/varnishd/cache/cache_tcp_pool.c
@@ -45,6 +45,8 @@
 #include "cache_tcp_pool.h"
 #include "cache_pool.h"
 
+#include "VSC_vbe.h"
+
 struct conn_pool;
 
 /*--------------------------------------------------------------------
@@ -373,7 +375,8 @@ VCP_Recycle(const struct worker *wrk, struct pfd **pfdp)
  */
 
 static int
-VCP_Open(const struct conn_pool *cp, double tmo, const void **privp)
+VCP_Open(const struct conn_pool *cp, double tmo, const void **privp,
+    struct VSC_vbe *vsc)
 {
 	int r;
 
@@ -381,6 +384,32 @@ VCP_Open(const struct conn_pool *cp, double tmo, const void **privp)
 
 	r = cp->methods->open(cp, tmo, privp);
 
+	if (r >= 0 || vsc == NULL)
+		return (r);
+
+	/* stats access unprotected */
+	switch (errno) {
+	case EACCES:
+	case EPERM:
+		vsc->fail_eacces++;
+		break;
+	case EADDRNOTAVAIL:
+		vsc->fail_eaddrnotavail++;
+		break;
+	case ECONNREFUSED:
+		vsc->fail_econnrefused++;
+		break;
+	case ENETUNREACH:
+		vsc->fail_enetunreach++;
+		break;
+	case ETIMEDOUT:
+		vsc->fail_etimedout++;
+		break;
+	default:
+		vsc->fail_other++;
+	}
+	vsc->fail++;
+
 	return (r);
 }
 
@@ -426,7 +455,7 @@ VCP_Close(struct pfd **pfdp)
 
 static struct pfd *
 VCP_Get(struct conn_pool *cp, double tmo, struct worker *wrk,
-    unsigned force_fresh)
+    unsigned force_fresh, struct VSC_vbe *vsc)
 {
 	struct pfd *pfd;
 
@@ -459,7 +488,7 @@ VCP_Get(struct conn_pool *cp, double tmo, struct worker *wrk,
 	INIT_OBJ(pfd->waited, WAITED_MAGIC);
 	pfd->state = PFD_STATE_USED;
 	pfd->conn_pool = cp;
-	pfd->fd = VCP_Open(cp, tmo, &pfd->priv);
+	pfd->fd = VCP_Open(cp, tmo, &pfd->priv, vsc);
 	if (pfd->fd < 0) {
 		FREE_OBJ(pfd);
 		Lck_Lock(&cp->mtx);
@@ -737,9 +766,10 @@ VTP_Rel(struct tcp_pool **tpp)
  */
 
 int
-VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp)
+VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp,
+    struct VSC_vbe *vsc)
 {
-	return (VCP_Open(tp->cp, tmo, privp));
+	return (VCP_Open(tp->cp, tmo, privp, vsc));
 }
 
 /*--------------------------------------------------------------------
@@ -770,10 +800,10 @@ VTP_Close(struct pfd **pfdp)
 
 struct pfd *
 VTP_Get(struct tcp_pool *tp, double tmo, struct worker *wrk,
-    unsigned force_fresh)
+	unsigned force_fresh, struct VSC_vbe *vsc)
 {
 
-	return VCP_Get(tp->cp, tmo, wrk, force_fresh);
+	return VCP_Get(tp->cp, tmo, wrk, force_fresh, vsc);
 }
 
 /*--------------------------------------------------------------------
diff --git a/bin/varnishd/cache/cache_tcp_pool.h b/bin/varnishd/cache/cache_tcp_pool.h
index d554a36..88b131e 100644
--- a/bin/varnishd/cache/cache_tcp_pool.h
+++ b/bin/varnishd/cache/cache_tcp_pool.h
@@ -50,6 +50,8 @@ void PFD_RemoteName(const struct pfd *, char *, unsigned, char *, unsigned);
  * Prototypes
  */
 
+struct VSC_vbe;
+
 struct tcp_pool *VTP_Ref(const struct suckaddr *ip4, const struct suckaddr *ip6,
     const char *uds, const void *id);
 	/*
@@ -70,9 +72,11 @@ void VTP_Rel(struct tcp_pool **);
 	 * the pool is destroyed and all cached connections closed.
 	 */
 
-int VTP_Open(const struct tcp_pool *, double tmo, const void **);
+int VTP_Open(const struct tcp_pool *, double tmo, const void **,
+    struct VSC_vbe *);
 	/*
 	 * Open a new connection and return the adress used.
+	 * Errors will be accounted in the optional vsc
 	 */
 
 void VTP_Close(struct pfd **);
@@ -86,9 +90,10 @@ void VTP_Recycle(const struct worker *, struct pfd **);
 	 */
 
 struct pfd *VTP_Get(struct tcp_pool *, double tmo, struct worker *,
-    unsigned force_fresh);
+    unsigned force_fresh, struct VSC_vbe *);
 	/*
 	 * Get a (possibly) recycled connection.
+	 * Open errors will be accounted in the optional vsc
 	 */
 
 int VTP_Wait(struct worker *, struct pfd *, double tmo);


More information about the varnish-commit mailing list