[6.0] 4a0dcf40e Statistics for backend open errors

Dridi Boukelmoune dridi.boukelmoune at gmail.com
Thu Aug 16 08:53:11 UTC 2018


commit 4a0dcf40e6813f1a569464315b8ad29469b607c9
Author: Nils Goroll <nils.goroll at uplex.de>
Date:   Tue Jun 5 08:04:14 2018 +0200

    Statistics for backend open errors
    
    Previously, we had zero stats on the cause of backend connection
    errors, which made it close to impossible to diagnose such issues in
    retrospect (only via log mining).
    
    We now pass an optional backend vsc to vcp and record errors per
    backend.
    
    Open errors are really per vcp entry (ip + port or udc path), which
    can be shared amongst backends (and even vcls), but we maintain the
    counters per backend (and, consequently, per vcl) for simplicity.  It
    should be noted though that errors for shared endpoints affect all
    backends using them.
    
    Ref #2622
    
    Conflicts:
            bin/varnishd/cache/cache_backend.c

diff --git a/bin/varnishd/VSC_vbe.vsc b/bin/varnishd/VSC_vbe.vsc
index c5ca37f16..b21590965 100644
--- a/bin/varnishd/VSC_vbe.vsc
+++ b/bin/varnishd/VSC_vbe.vsc
@@ -78,5 +78,50 @@
 	:level:	info
 	:oneliner:	Backend requests sent
 
-.. varnish_vsc_end::	vbe
+..
+	=== Anything below is actually per VCP entry, but collected per
+	=== backend for simplicity
+
+.. varnish_vsc:: fail
+	:type:	counter
+	:level: info
+	:oneliner:	Connections failed
+
+	Counter of failed opens. Detailed reasons are given in the
+	fail_* counters (DIAG level) and in Debug VSL.
+
+	This counter is the sum of all detailed fail_* counters.
+
+	All fail_* counters may be slightly inaccurate for efficiency.
+
+.. varnish_vsc:: fail_eacces
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed with EACCES or EPERM
+
+.. varnish_vsc:: fail_eaddrnotavail
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed with EADDRNOTAVAIL
+
+.. varnish_vsc:: fail_econnrefused
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed with ECONNREFUSED
+
+.. varnish_vsc:: fail_enetunreach
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed with ENETUNREACH
 
+.. varnish_vsc:: fail_etimedout
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed ETIMEDOUT
+
+.. varnish_vsc:: fail_other
+	:type:	counter
+	:level: diag
+	:oneliner:	Connections failed for other reason
+
+.. varnish_vsc_end::	vbe
diff --git a/bin/varnishd/cache/cache_backend.c b/bin/varnishd/cache/cache_backend.c
index 610132a62..11856b8ca 100644
--- a/bin/varnishd/cache/cache_backend.c
+++ b/bin/varnishd/cache/cache_backend.c
@@ -114,7 +114,7 @@ vbe_dir_getfd(struct worker *wrk, struct backend *bp, struct busyobj *bo,
 	bo->htc->doclose = SC_NULL;
 
 	FIND_TMO(connect_timeout, tmod, bo, bp);
-	pfd = VTP_Get(bp->tcp_pool, tmod, wrk, force_fresh);
+	pfd = VTP_Get(bp->tcp_pool, tmod, wrk, force_fresh, bp->vsc);
 	if (pfd == NULL) {
 		VSLb(bo->vsl, SLT_FetchError,
 		     "backend %s: fail errno %d (%s)",
diff --git a/bin/varnishd/cache/cache_backend_probe.c b/bin/varnishd/cache/cache_backend_probe.c
index 276391dba..b45eed1f6 100644
--- a/bin/varnishd/cache/cache_backend_probe.c
+++ b/bin/varnishd/cache/cache_backend_probe.c
@@ -276,7 +276,8 @@ vbp_poke(struct vbp_target *vt)
 	t_start = t_now = VTIM_real();
 	t_end = t_start + vt->timeout;
 
-	s = VTP_Open(vt->tcp_pool, t_end - t_now, (const void **)&sa);
+	s = VTP_Open(vt->tcp_pool, t_end - t_now, (const void **)&sa,
+		vt->backend->vsc);
 	if (s < 0) {
 		/* Got no connection: failed */
 		return;
diff --git a/bin/varnishd/cache/cache_tcp_pool.c b/bin/varnishd/cache/cache_tcp_pool.c
index c92ba8cbb..f5fa2a365 100644
--- a/bin/varnishd/cache/cache_tcp_pool.c
+++ b/bin/varnishd/cache/cache_tcp_pool.c
@@ -45,6 +45,8 @@
 #include "cache_tcp_pool.h"
 #include "cache_pool.h"
 
+#include "VSC_vbe.h"
+
 struct conn_pool;
 
 /*--------------------------------------------------------------------
@@ -373,7 +375,8 @@ VCP_Recycle(const struct worker *wrk, struct pfd **pfdp)
  */
 
 static int
-VCP_Open(const struct conn_pool *cp, double tmo, const void **privp)
+VCP_Open(const struct conn_pool *cp, double tmo, const void **privp,
+    struct VSC_vbe *vsc)
 {
 	int r;
 
@@ -381,6 +384,32 @@ VCP_Open(const struct conn_pool *cp, double tmo, const void **privp)
 
 	r = cp->methods->open(cp, tmo, privp);
 
+	if (r >= 0 || vsc == NULL)
+		return (r);
+
+	/* stats access unprotected */
+	switch (errno) {
+	case EACCES:
+	case EPERM:
+		vsc->fail_eacces++;
+		break;
+	case EADDRNOTAVAIL:
+		vsc->fail_eaddrnotavail++;
+		break;
+	case ECONNREFUSED:
+		vsc->fail_econnrefused++;
+		break;
+	case ENETUNREACH:
+		vsc->fail_enetunreach++;
+		break;
+	case ETIMEDOUT:
+		vsc->fail_etimedout++;
+		break;
+	default:
+		vsc->fail_other++;
+	}
+	vsc->fail++;
+
 	return (r);
 }
 
@@ -426,7 +455,7 @@ VCP_Close(struct pfd **pfdp)
 
 static struct pfd *
 VCP_Get(struct conn_pool *cp, double tmo, struct worker *wrk,
-    unsigned force_fresh)
+    unsigned force_fresh, struct VSC_vbe *vsc)
 {
 	struct pfd *pfd;
 
@@ -459,7 +488,7 @@ VCP_Get(struct conn_pool *cp, double tmo, struct worker *wrk,
 	INIT_OBJ(pfd->waited, WAITED_MAGIC);
 	pfd->state = PFD_STATE_USED;
 	pfd->conn_pool = cp;
-	pfd->fd = VCP_Open(cp, tmo, &pfd->priv);
+	pfd->fd = VCP_Open(cp, tmo, &pfd->priv, vsc);
 	if (pfd->fd < 0) {
 		FREE_OBJ(pfd);
 		Lck_Lock(&cp->mtx);
@@ -737,9 +766,10 @@ VTP_Rel(struct tcp_pool **tpp)
  */
 
 int
-VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp)
+VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp,
+    struct VSC_vbe *vsc)
 {
-	return (VCP_Open(tp->cp, tmo, privp));
+	return (VCP_Open(tp->cp, tmo, privp, vsc));
 }
 
 /*--------------------------------------------------------------------
@@ -770,10 +800,10 @@ VTP_Close(struct pfd **pfdp)
 
 struct pfd *
 VTP_Get(struct tcp_pool *tp, double tmo, struct worker *wrk,
-    unsigned force_fresh)
+	unsigned force_fresh, struct VSC_vbe *vsc)
 {
 
-	return VCP_Get(tp->cp, tmo, wrk, force_fresh);
+	return VCP_Get(tp->cp, tmo, wrk, force_fresh, vsc);
 }
 
 /*--------------------------------------------------------------------
diff --git a/bin/varnishd/cache/cache_tcp_pool.h b/bin/varnishd/cache/cache_tcp_pool.h
index d554a360f..88b131e9a 100644
--- a/bin/varnishd/cache/cache_tcp_pool.h
+++ b/bin/varnishd/cache/cache_tcp_pool.h
@@ -50,6 +50,8 @@ void PFD_RemoteName(const struct pfd *, char *, unsigned, char *, unsigned);
  * Prototypes
  */
 
+struct VSC_vbe;
+
 struct tcp_pool *VTP_Ref(const struct suckaddr *ip4, const struct suckaddr *ip6,
     const char *uds, const void *id);
 	/*
@@ -70,9 +72,11 @@ void VTP_Rel(struct tcp_pool **);
 	 * the pool is destroyed and all cached connections closed.
 	 */
 
-int VTP_Open(const struct tcp_pool *, double tmo, const void **);
+int VTP_Open(const struct tcp_pool *, double tmo, const void **,
+    struct VSC_vbe *);
 	/*
 	 * Open a new connection and return the adress used.
+	 * Errors will be accounted in the optional vsc
 	 */
 
 void VTP_Close(struct pfd **);
@@ -86,9 +90,10 @@ void VTP_Recycle(const struct worker *, struct pfd **);
 	 */
 
 struct pfd *VTP_Get(struct tcp_pool *, double tmo, struct worker *,
-    unsigned force_fresh);
+    unsigned force_fresh, struct VSC_vbe *);
 	/*
 	 * Get a (possibly) recycled connection.
+	 * Open errors will be accounted in the optional vsc
 	 */
 
 int VTP_Wait(struct worker *, struct pfd *, double tmo);


More information about the varnish-commit mailing list