[6.0] 2dfd573fa Accurate ban statistics except for a few remaining corner cases

Dridi Boukelmoune dridi.boukelmoune at gmail.com
Thu Aug 16 08:53:15 UTC 2018


commit 2dfd573fae79ee4f8bd48ceed45dcc69720234dc
Author: Nils Goroll <nils.goroll at uplex.de>
Date:   Wed Jun 20 13:39:09 2018 +0200

    Accurate ban statistics except for a few remaining corner cases
    
    For ban statistics, we updated VSC_C_main directly, so if we raced
    Pool_Sumstat(), that could undo our changes.
    
    This patch fixes statistics by using the per-worker statistics
    cache except for the following remaining corner cases:
    
    * bans_persisted_* counters receive absolut updates, which does
      not seem to fit the incremental updates via the per-worker stats.
    
      I've kept these cases untouched and marked with comments. Worst
      that should happen here are temporary inconsistencies until the
      next absolute update.
    
    * For BAN_Reload(), my understanding is that it should only
      happen during init, so we continue to update VSC_C_main
      directly.
    
    * For bans via the cli, we would need to grab the wstat lock,
      which, at the moment, is private to the worker implementation.
    
      Until we make a change here, we could miss a ban increment
      from the cli.
    
    * for VCL bans from vcl_init / fini, we do not have access
      to the worker struct at the moment, so for now we also
      accept an inconsistency here.
    
    Fixes #2716 for relevant cases

diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index f349d4f54..2298feaad 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -570,7 +570,7 @@ struct sess {
 struct ban_proto *BAN_Build(void);
 const char *BAN_AddTest(struct ban_proto *,
     const char *, const char *, const char *);
-const char *BAN_Commit(struct ban_proto *b);
+const char *BAN_Commit(struct ban_proto *b, struct VSC_main *stats);
 void BAN_Abandon(struct ban_proto *b);
 
 /* cache_cli.c [CLI] */
diff --git a/bin/varnishd/cache/cache_ban.c b/bin/varnishd/cache/cache_ban.c
index 0393deea6..b3203a042 100644
--- a/bin/varnishd/cache/cache_ban.c
+++ b/bin/varnishd/cache/cache_ban.c
@@ -152,7 +152,7 @@ ban_equal(const uint8_t *bs1, const uint8_t *bs2)
 }
 
 void
-ban_mark_completed(struct ban *b)
+ban_mark_completed(struct ban *b, struct VSC_main *stats)
 {
 	unsigned ln;
 
@@ -166,8 +166,12 @@ ban_mark_completed(struct ban *b)
 		b->spec[BANS_FLAGS] |= BANS_FLAG_COMPLETED;
 		VWMB();
 		vbe32enc(b->spec + BANS_LENGTH, BANS_HEAD_LEN);
-		VSC_C_main->bans_completed++;
+		stats->bans_completed++;
 		bans_persisted_fragmentation += ln - ban_len(b->spec);
+		/*
+		 * XXX absolute update of gauges - may be inaccurate for
+		 * Pool_Sumstat race
+		 */
 		VSC_C_main->bans_persisted_fragmentation =
 		    bans_persisted_fragmentation;
 	}
@@ -315,6 +319,10 @@ ban_export(void)
 	assert(VSB_len(vsb) == ln);
 	STV_BanExport((const uint8_t *)VSB_data(vsb), VSB_len(vsb));
 	VSB_destroy(&vsb);
+	/*
+	 * XXX absolute update of gauges - may be inaccurate for Pool_Sumstat
+	 * race
+	 */
 	VSC_C_main->bans_persisted_bytes =
 	    bans_persisted_bytes = ln;
 	VSC_C_main->bans_persisted_fragmentation =
@@ -359,7 +367,7 @@ ban_reload(const uint8_t *ban, unsigned len)
 	struct ban *b, *b2;
 	int duplicate = 0;
 	double t0, t1, t2 = 9e99;
-
+	struct VSC_main *stats = VSC_C_main;	// XXX accurate?
 	ASSERT_CLI();
 	Lck_AssertHeld(&ban_mtx);
 
@@ -378,8 +386,8 @@ ban_reload(const uint8_t *ban, unsigned len)
 			duplicate = 1;
 	}
 
-	VSC_C_main->bans++;
-	VSC_C_main->bans_added++;
+	stats->bans++;
+	stats->bans_added++;
 
 	b2 = ban_alloc();
 	AN(b2);
@@ -387,18 +395,22 @@ ban_reload(const uint8_t *ban, unsigned len)
 	AN(b2->spec);
 	memcpy(b2->spec, ban, len);
 	if (ban[BANS_FLAGS] & BANS_FLAG_REQ) {
-		VSC_C_main->bans_req++;
+		stats->bans_req++;
 		b2->flags |= BANS_FLAG_REQ;
 	}
 	if (duplicate)
-		VSC_C_main->bans_dups++;
+		stats->bans_dups++;
 	if (duplicate || (ban[BANS_FLAGS] & BANS_FLAG_COMPLETED))
-		ban_mark_completed(b2);
+		ban_mark_completed(b2, stats);
 	if (b == NULL)
 		VTAILQ_INSERT_TAIL(&ban_head, b2, list);
 	else
 		VTAILQ_INSERT_BEFORE(b, b2, list);
 	bans_persisted_bytes += len;
+	/*
+	 * XXX absolute update of gauges - may be inaccurate for Pool_Sumstat
+	 * race
+	 */
 	VSC_C_main->bans_persisted_bytes = bans_persisted_bytes;
 
 	/* Hunt down older duplicates */
@@ -406,8 +418,8 @@ ban_reload(const uint8_t *ban, unsigned len)
 		if (b->flags & BANS_FLAG_COMPLETED)
 			continue;
 		if (ban_equal(b->spec, ban)) {
-			ban_mark_completed(b);
-			VSC_C_main->bans_dups++;
+			ban_mark_completed(b, stats);
+			stats->bans_dups++;
 		}
 	}
 }
@@ -536,13 +548,16 @@ BAN_CheckObject(struct worker *wrk, struct objcore *oc, struct req *req)
 	struct vsl_log *vsl;
 	struct ban *b0, *bn;
 	unsigned tests;
+	struct VSC_main *stats;
 
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
 	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
 	Lck_AssertHeld(&oc->objhead->mtx);
 	assert(oc->refcnt > 0);
 
 	vsl = req->vsl;
+	stats = wrk->stats;
 
 	CHECK_OBJ_NOTNULL(oc->ban, BAN_MAGIC);
 
@@ -585,8 +600,8 @@ BAN_CheckObject(struct worker *wrk, struct objcore *oc, struct req *req)
 
 	Lck_Lock(&ban_mtx);
 	bn->refcount--;
-	VSC_C_main->bans_tested++;
-	VSC_C_main->bans_tests_tested += tests;
+	stats->bans_tested++;
+	stats->bans_tests_tested += tests;
 
 	if (b == bn) {
 		/* not banned */
@@ -609,7 +624,7 @@ BAN_CheckObject(struct worker *wrk, struct objcore *oc, struct req *req)
 		return (0);
 	} else {
 		VSLb(vsl, SLT_ExpBan, "%u banned lookup", ObjGetXID(wrk, oc));
-		VSC_C_main->bans_obj_killed++;
+		stats->bans_obj_killed++;
 		return (1);
 	}
 }
@@ -655,8 +670,10 @@ ccf_ban(struct cli *cli, const char * const *av, void *priv)
 			break;
 	}
 
-	if (err == NULL)
-		err = BAN_Commit(bp);
+	if (err == NULL) {
+		// XXX racy - grab wstat lock?
+		err = BAN_Commit(bp, VSC_C_main);
+	}
 
 	if (err != NULL) {
 		VCLI_Out(cli, "%s", err);
@@ -804,9 +821,9 @@ BAN_Init(void)
 	bp = BAN_Build();
 	AN(bp);
 	AZ(pthread_cond_init(&ban_lurker_cond, NULL));
-	AZ(BAN_Commit(bp));
+	AZ(BAN_Commit(bp, VSC_C_main));
 	Lck_Lock(&ban_mtx);
-	ban_mark_completed(VTAILQ_FIRST(&ban_head));
+	ban_mark_completed(VTAILQ_FIRST(&ban_head), VSC_C_main);
 	Lck_Unlock(&ban_mtx);
 }
 
diff --git a/bin/varnishd/cache/cache_ban.h b/bin/varnishd/cache/cache_ban.h
index 6267c4eed..d534f6e99 100644
--- a/bin/varnishd/cache/cache_ban.h
+++ b/bin/varnishd/cache/cache_ban.h
@@ -109,7 +109,7 @@ extern pthread_cond_t	ban_lurker_cond;
 extern uint64_t bans_persisted_bytes;
 extern uint64_t bans_persisted_fragmentation;
 
-void ban_mark_completed(struct ban *b);
+void ban_mark_completed(struct ban *b, struct VSC_main *stats);
 unsigned ban_len(const uint8_t *banspec);
 void ban_info_new(const uint8_t *ban, unsigned len);
 void ban_info_drop(const uint8_t *ban, unsigned len);
diff --git a/bin/varnishd/cache/cache_ban_build.c b/bin/varnishd/cache/cache_ban_build.c
index 717f5c506..95f82bcc8 100644
--- a/bin/varnishd/cache/cache_ban_build.c
+++ b/bin/varnishd/cache/cache_ban_build.c
@@ -246,7 +246,7 @@ BAN_AddTest(struct ban_proto *bp,
  */
 
 const char *
-BAN_Commit(struct ban_proto *bp)
+BAN_Commit(struct ban_proto *bp, struct VSC_main *stats)
 {
 	struct ban  *b, *bi;
 	ssize_t ln;
@@ -294,15 +294,19 @@ BAN_Commit(struct ban_proto *bp)
 	VTAILQ_INSERT_HEAD(&ban_head, b, list);
 	ban_start = b;
 
-	VSC_C_main->bans++;
-	VSC_C_main->bans_added++;
+	stats->bans++;
+	stats->bans_added++;
 	bans_persisted_bytes += ln;
+	/*
+	 * XXX absolute update of gauges - may be inaccurate for Pool_Sumstat
+	 * race
+	 */
 	VSC_C_main->bans_persisted_bytes = bans_persisted_bytes;
 
 	if (b->flags & BANS_FLAG_OBJ)
-		VSC_C_main->bans_obj++;
+		stats->bans_obj++;
 	if (b->flags & BANS_FLAG_REQ)
-		VSC_C_main->bans_req++;
+		stats->bans_req++;
 
 	if (bi != NULL)
 		ban_info_new(b->spec, ln);	/* Notify stevedores */
@@ -313,8 +317,8 @@ BAN_Commit(struct ban_proto *bp)
 		    bi = VTAILQ_NEXT(bi, list)) {
 			if (!(bi->flags & BANS_FLAG_COMPLETED) &&
 			    ban_equal(b->spec, bi->spec)) {
-				ban_mark_completed(bi);
-				VSC_C_main->bans_dups++;
+				ban_mark_completed(bi, stats);
+				stats->bans_dups++;
 			}
 		}
 	}
diff --git a/bin/varnishd/cache/cache_ban_lurker.c b/bin/varnishd/cache/cache_ban_lurker.c
index f59b888ce..343009dd5 100644
--- a/bin/varnishd/cache/cache_ban_lurker.c
+++ b/bin/varnishd/cache/cache_ban_lurker.c
@@ -62,7 +62,7 @@ ban_kick_lurker(void)
  */
 
 static int
-ban_cleantail(const struct ban *victim)
+ban_cleantail(const struct ban *victim, struct VSC_main *stats)
 {
 	struct ban *b, *bt;
 	struct banhead_s freelist = VTAILQ_HEAD_INITIALIZER(freelist);
@@ -78,17 +78,21 @@ ban_cleantail(const struct ban *victim)
 		if (b != VTAILQ_FIRST(&ban_head) && b->refcount == 0) {
 			assert(VTAILQ_EMPTY(&b->objcore));
 			if (b->flags & BANS_FLAG_COMPLETED)
-				VSC_C_main->bans_completed--;
+				stats->bans_completed--;
 			if (b->flags & BANS_FLAG_OBJ)
-				VSC_C_main->bans_obj--;
+				stats->bans_obj--;
 			if (b->flags & BANS_FLAG_REQ)
-				VSC_C_main->bans_req--;
-			VSC_C_main->bans--;
-			VSC_C_main->bans_deleted++;
+				stats->bans_req--;
+			stats->bans--;
+			stats->bans_deleted++;
 			VTAILQ_REMOVE(&ban_head, b, list);
 			VTAILQ_INSERT_TAIL(&freelist, b, list);
 			bans_persisted_fragmentation +=
 			    ban_len(b->spec);
+			/*
+			 * XXX absolute update of gauges - may be inaccurate for
+			 * Pool_Sumstat race
+			 */
 			VSC_C_main->bans_persisted_fragmentation =
 			    bans_persisted_fragmentation;
 			ban_info_drop(b->spec, ban_len(b->spec));
@@ -121,7 +125,7 @@ ban_cleantail(const struct ban *victim)
  */
 
 static struct objcore *
-ban_lurker_getfirst(struct vsl_log *vsl, struct ban *bt)
+ban_lurker_getfirst(struct vsl_log *vsl, struct ban *bt, struct VSC_main *stats)
 {
 	struct objhead *oh;
 	struct objcore *oc, *noc;
@@ -151,7 +155,7 @@ ban_lurker_getfirst(struct vsl_log *vsl, struct ban *bt)
 
 			/* hold off to give lookup a chance and reiterate */
 			Lck_Unlock(&ban_mtx);
-			VSC_C_main->bans_lurker_contention++;
+			stats->bans_lurker_contention++;
 			VSL_Flush(vsl, 0);
 			VTIM_sleep(cache_param->ban_lurker_holdoff);
 			Lck_Lock(&ban_mtx);
@@ -205,6 +209,10 @@ ban_lurker_test_ban(struct worker *wrk, struct vsl_log *vsl, struct ban *bt,
 	struct objcore *oc;
 	unsigned tests;
 	int i;
+	struct VSC_main *stats;
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	stats = wrk->stats;
 
 	/*
 	 * First see if there is anything to do, and if so, insert markers
@@ -224,7 +232,7 @@ ban_lurker_test_ban(struct worker *wrk, struct vsl_log *vsl, struct ban *bt,
 			VTIM_sleep(cache_param->ban_lurker_sleep);
 			ban_batch = 0;
 		}
-		oc = ban_lurker_getfirst(vsl, bt);
+		oc = ban_lurker_getfirst(vsl, bt, stats);
 		if (oc == NULL)
 			return;
 		i = 0;
@@ -248,21 +256,21 @@ ban_lurker_test_ban(struct worker *wrk, struct vsl_log *vsl, struct ban *bt,
 				tests = 0;
 				i = ban_evaluate(wrk, bl->spec, oc, NULL,
 				    &tests);
-				VSC_C_main->bans_lurker_tested++;
-				VSC_C_main->bans_lurker_tests_tested += tests;
+				stats->bans_lurker_tested++;
+				stats->bans_lurker_tests_tested += tests;
 			}
 			if (i) {
 				if (kill) {
 					VSLb(vsl, SLT_ExpBan,
 					    "%u killed for lurker cutoff",
 					    ObjGetXID(wrk, oc));
-					VSC_C_main->
+					stats->
 					    bans_lurker_obj_killed_cutoff++;
 				} else {
 					VSLb(vsl, SLT_ExpBan,
 					    "%u banned by lurker",
 					    ObjGetXID(wrk, oc));
-					VSC_C_main->bans_lurker_obj_killed++;
+					stats->bans_lurker_obj_killed++;
 				}
 				HSH_Kill(oc);
 				break;
@@ -303,10 +311,14 @@ ban_lurker_work(struct worker *wrk, struct vsl_log *vsl)
 	struct banhead_s obans;
 	double d, dt, n;
 	unsigned count = 0, cutoff = UINT_MAX;
+	struct VSC_main *stats;
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	stats = wrk->stats;
 
 	dt = 49.62;		// Random, non-magic
 	if (cache_param->ban_lurker_sleep == 0) {
-		(void)ban_cleantail(NULL);
+		(void)ban_cleantail(NULL, stats);
 		return (dt);
 	}
 	if (cache_param->ban_cutoff > 0)
@@ -346,7 +358,7 @@ ban_lurker_work(struct worker *wrk, struct vsl_log *vsl)
 	 * containted the first oban, all obans were on the tail and we're
 	 * done.
 	 */
-	if (ban_cleantail(VTAILQ_FIRST(&obans)))
+	if (ban_cleantail(VTAILQ_FIRST(&obans), stats))
 		return (dt);
 
 	if (VTAILQ_FIRST(&obans) == NULL)
@@ -374,7 +386,7 @@ ban_lurker_work(struct worker *wrk, struct vsl_log *vsl)
 
 	Lck_Lock(&ban_mtx);
 	VTAILQ_FOREACH(b, &obans, l_list) {
-		ban_mark_completed(b);
+		ban_mark_completed(b, stats);
 		if (b == bd)
 			break;
 	}
diff --git a/bin/varnishd/cache/cache_vrt.c b/bin/varnishd/cache/cache_vrt.c
index 8a31ee157..c4c709aa7 100644
--- a/bin/varnishd/cache/cache_vrt.c
+++ b/bin/varnishd/cache/cache_vrt.c
@@ -558,6 +558,26 @@ VRT_synth_page(VRT_CTX, const char *str, ...)
 
 /*--------------------------------------------------------------------*/
 
+static struct VSC_main *
+vrt_stats(VRT_CTX)
+{
+	struct worker *wrk;
+
+	if (ctx->req) {
+		CHECK_OBJ_NOTNULL(ctx->req, REQ_MAGIC);
+		wrk = ctx->req->wrk;
+	} else if (ctx->bo) {
+		CHECK_OBJ_NOTNULL(ctx->bo, BUSYOBJ_MAGIC);
+		wrk = ctx->bo->wrk;
+	} else {
+		// XXX
+		return (VSC_C_main);
+	}
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	return (wrk->stats);
+}
+
 VCL_VOID
 VRT_ban_string(VRT_CTX, VCL_STRING str)
 {
@@ -613,7 +633,7 @@ VRT_ban_string(VRT_CTX, VCL_STRING str)
 			break;
 		}
 		if (av[++i] == NULL) {
-			err = BAN_Commit(bp);
+			err = BAN_Commit(bp, vrt_stats(ctx));
 			if (err == NULL)
 				bp = NULL;
 			else
diff --git a/include/vrt.h b/include/vrt.h
index fdc454bba..936aa207f 100644
--- a/include/vrt.h
+++ b/include/vrt.h
@@ -123,6 +123,7 @@ struct vsc_seg;
 struct vsmw_cluster;
 struct vsl_log;
 struct ws;
+struct VSC_main;
 
 struct strands {
 	int		n;


More information about the varnish-commit mailing list