[6.0] 4db967e77 Add a watchdog to worker pools.

Wed Oct 31 13:08:26 UTC 2018

commit 4db967e77847dbeac0acd67dea65518ec9230ea4
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date:   Wed Oct 3 10:12:59 2018 +0000

    Add a watchdog to worker pools.
    
    If the worker pool is configured too small, it can deadlock.
    
    Recovering from this would require a lot of complicated code, to
    discover queued but unscheduled tasks which can be cancelled
    (because the client went away or otherwise) and code to do the
    cancelling etc. etc.
    
    But fundamentally either people configured their pools wrong, in
    which case we want them to notice, or they are under DoS, in which
    case recovering gracefully is unlikely be a major improvement over
    a restart.
    
    Instead we implement a per-pool watchdog and kill the child process
    if nothing has been dequeued for too long.
    
    Default value 10 seconds, open to discussion.
    
    Band-aid for:   #2418
    
    Test-case by:   @Dridi

diff --git a/bin/varnishd/cache/cache_pool.h b/bin/varnishd/cache/cache_pool.h
index e7a6a618b..9902e32ad 100644
--- a/bin/varnishd/cache/cache_pool.h
+++ b/bin/varnishd/cache/cache_pool.h
@@ -53,6 +53,7 @@ struct pool {
 	uintmax_t			sdropped;
 	uintmax_t			rdropped;
 	uintmax_t			nqueued;
+	uintmax_t			ndequeued;
 	struct VSC_main_wrk		*a_stat;
 	struct VSC_main_wrk		*b_stat;
 
diff --git a/bin/varnishd/cache/cache_wrk.c b/bin/varnishd/cache/cache_wrk.c
index 589ce68cb..f7f95cfc5 100644
--- a/bin/varnishd/cache/cache_wrk.c
+++ b/bin/varnishd/cache/cache_wrk.c
@@ -341,6 +341,7 @@ Pool_Work_Thread(struct pool *pp, struct worker *wrk)
 			tp = VTAILQ_FIRST(&pp->queues[i]);
 			if (tp != NULL) {
 				pp->lqueue--;
+				pp->ndequeued--;
 				VTAILQ_REMOVE(&pp->queues[i], tp, list);
 				break;
 			}
@@ -487,6 +488,8 @@ pool_herder(void *priv)
 	struct worker *wrk;
 	double delay;
 	int wthread_min;
+	uintmax_t dq = (1ULL << 31);
+	double dqt;
 
 	CAST_OBJ_NOTNULL(pp, priv, POOL_MAGIC);
 
@@ -494,6 +497,29 @@ pool_herder(void *priv)
 	THR_Init();
 
 	while (!pp->die || pp->nthr > 0) {
+		/*
+		 * If the worker pool is configured too small, we can
+		 * end up deadlocking it (see #2418 for details).
+		 *
+		 * Recovering from this would require a lot of complicated
+		 * code, and fundamentally, either people configured their
+		 * pools wrong, in which case we want them to notice, or
+		 * they are under DoS, in which case recovering gracefully
+		 * is unlikely be a major improvement.
+		 *
+		 * Instead we implement a watchdog and kill the worker if
+		 * nothing has been dequeued for that long.
+		 */
+		if (dq != pp->ndequeued) {
+			dq = pp->ndequeued;
+			dqt = VTIM_real();
+		} else if (pp->lqueue &&
+		    VTIM_real() - dqt > cache_param->wthread_watchdog) {
+			VSL(SLT_Error, 0,
+			   "Pool Herder: Queue does not move ql=%u dt=%f",
+			   pp->lqueue, VTIM_real() - dqt);
+			WRONG("Worker Pool Queue does not move");
+		}
 		wthread_min = cache_param->wthread_min;
 		if (pp->die)
 			wthread_min = 0;
diff --git a/bin/varnishd/common/common_param.h b/bin/varnishd/common/common_param.h
index 2366527b1..13ce0cb18 100644
--- a/bin/varnishd/common/common_param.h
+++ b/bin/varnishd/common/common_param.h
@@ -105,6 +105,7 @@ struct params {
 	double			wthread_add_delay;
 	double			wthread_fail_delay;
 	double			wthread_destroy_delay;
+	double			wthread_watchdog;
 	unsigned		wthread_stats_rate;
 	ssize_t			wthread_stacksize;
 	unsigned		wthread_queue_limit;
diff --git a/bin/varnishd/mgt/mgt_pool.c b/bin/varnishd/mgt/mgt_pool.c
index 14bceac9e..25b6efba0 100644
--- a/bin/varnishd/mgt/mgt_pool.c
+++ b/bin/varnishd/mgt/mgt_pool.c
@@ -148,6 +148,15 @@ struct parspec WRK_parspec[] = {
 		"for at least this long, will be destroyed.",
 		EXPERIMENTAL | DELAYED_EFFECT,
 		"300", "seconds" },
+	{ "thread_pool_watchdog",
+		tweak_timeout, &mgt_param.wthread_watchdog,
+		"0.1", NULL,
+		"Thread queue stuck watchdog.\n"
+		"\n"
+		"If no queued work have been released for this long,"
+		" the worker process panics itself.",
+		EXPERIMENTAL,
+		"10", "seconds" },
 	{ "thread_pool_destroy_delay",
 		tweak_timeout, &mgt_param.wthread_destroy_delay,
 		"0.01", NULL,
diff --git a/bin/varnishtest/tests/r02418.vtc b/bin/varnishtest/tests/r02418.vtc
new file mode 100644
index 000000000..a03ca546c
--- /dev/null
+++ b/bin/varnishtest/tests/r02418.vtc
@@ -0,0 +1,72 @@
+varnishtest "h2 queuing deadlock"
+
+barrier b1 cond 2
+
+# A reserve of 1 thread in a pool of 3 leaves a maximum
+# of 2 running sessions, the streams will be queued (except
+# stream 0 that is part of the h2 session).
+
+varnish v1 -cliok "param.set thread_pools 1"
+varnish v1 -cliok "param.set thread_pool_min 3"
+varnish v1 -cliok "param.set thread_pool_max 3"
+varnish v1 -cliok "param.set thread_pool_reserve 1"
+varnish v1 -cliok "param.set thread_pool_watchdog 5"
+varnish v1 -cliok "param.set feature +http2"
+varnish v1 -cliok "param.set feature +no_coredump"
+
+varnish v1 -vcl {
+	backend b1 { .host = "${bad_ip}"; }
+	sub vcl_recv {
+		return (synth(200));
+	}
+} -start
+
+logexpect l1 -v v1 -g raw {
+	expect * *	Error	"Pool Herder: Queue does not move*"
+} -start
+
+# Starve the pool with h2 sessions
+
+client c1 {
+	txpri
+	stream 0 rxsettings -run
+
+	barrier b1 sync
+
+	stream 1 {
+		txreq
+		# can't be scheduled, don't rx
+	} -run
+} -start
+
+client c2 {
+	txpri
+	stream 0 rxsettings -run
+
+	barrier b1 sync
+
+	stream 1 {
+		txreq
+		# can't be scheduled, don't rx
+	} -run
+} -start
+
+client c1 -wait
+client c2 -wait
+
+varnish v1 -vsl_catchup
+
+# At this point c1 and c2 closed their connections
+
+client c3 {
+	txreq
+	delay 10
+} -run
+
+logexpect l1 -wait
+
+varnish v1 -cliok panic.show
+varnish v1 -cliok panic.clear
+
+varnish v1 -expectexit 0x20
+
diff --git a/include/tbl/params.h b/include/tbl/params.h
index 054b2f449..42083aa5e 100644
--- a/include/tbl/params.h
+++ b/include/tbl/params.h
@@ -1195,6 +1195,24 @@ PARAM(
 	/* func */	NULL
 )
 
+/* actual location mgt_pool.c */
+PARAM(
+	/* name */	thread_pool_watchdog,
+	/* typ */	timeout,
+	/* min */	"0.1",
+	/* max */	NULL,
+	/* default */	"10.000",
+	/* units */	"seconds",
+	/* flags */	EXPERIMENTAL,
+	/* s-text */
+	"Thread queue stuck watchdog.\n"
+	"\n"
+	"If no queued work have been released for this long,"
+	" the worker process panics itself.",
+	/* l-text */	"",
+	/* func */	NULL
+)
+
 /* actual location mgt_pool.c */
 PARAM(
 	/* name */	thread_pool_destroy_delay,