[PATCH] Remember to signal the thread to exit when decimating the flock

Martin Blix Grydeland martin at varnish-software.com
Wed Apr 30 13:51:12 CEST 2014


The pool herder didn't signal the thread when decimating the flock,
causing the thread to be leaked.

Spotted by: xcir
Fixes: #1490

Also close a couple of other races:

When pthread_cond_timedwait returns ETIMEDOUT, the predicate
(wrk->task.func == NULL) could still have changed while reacquiring
the mutex. If so, the signal would've been lost and the thread
leaked. Changed the idle wait loop in Pool_Work_Thread() to always
check the predicate before resuming the cond_wait.

The update of the predicate (wrk->task.func) from e.g. Pool_Task() is
done without holding the mutex. This opens a race on the predicate,
and the possibility of the worker going back on cond_wait loosing the
signal. Changed to update the predicate while holding the mutex.
---
 bin/varnishd/cache/cache_pool.c  | 31 +++++++++++++++++++++----------
 bin/varnishtest/tests/r01490.vtc | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 10 deletions(-)
 create mode 100644 bin/varnishtest/tests/r01490.vtc

diff --git a/bin/varnishd/cache/cache_pool.c b/bin/varnishd/cache/cache_pool.c
index 75fac47..fbfb993 100644
--- a/bin/varnishd/cache/cache_pool.c
+++ b/bin/varnishd/cache/cache_pool.c
@@ -163,12 +163,12 @@ pool_accept(struct worker *wrk, void *arg)
 		}
 		VTAILQ_REMOVE(&pp->idle_queue, &wrk2->task, list);
 		AZ(wrk2->task.func);
-		Lck_Unlock(&pp->mtx);
 		assert(sizeof *wa2 == WS_Reserve(wrk2->aws, sizeof *wa2));
 		wa2 = (void*)wrk2->aws->f;
 		memcpy(wa2, wa, sizeof *wa);
 		wrk2->task.func = SES_pool_accept_task;
 		wrk2->task.priv = pp->sesspool;
+		Lck_Unlock(&pp->mtx);
 		AZ(pthread_cond_signal(&wrk2->cond));
 
 		/*
@@ -204,9 +204,9 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum pool_how how)
 	if (wrk != NULL) {
 		VTAILQ_REMOVE(&pp->idle_queue, &wrk->task, list);
 		AZ(wrk->task.func);
-		Lck_Unlock(&pp->mtx);
 		wrk->task.func = task->func;
 		wrk->task.priv = task->priv;
+		Lck_Unlock(&pp->mtx);
 		AZ(pthread_cond_signal(&wrk->cond));
 		return (0);
 	}
@@ -237,6 +237,17 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum pool_how how)
 }
 
 /*--------------------------------------------------------------------
+ * Empty function used as a pointer value for the thread exit condition.
+ */
+
+static void
+pool_kiss_of_death(struct worker *wrk, void *priv)
+{
+	(void)wrk;
+	(void)priv;
+}
+
+/*--------------------------------------------------------------------
  * This is the work function for worker threads in the pool.
  */
 
@@ -274,7 +285,6 @@ Pool_Work_Thread(void *priv, struct worker *wrk)
 				wrk->lastused = VTIM_real();
 			wrk->task.func = NULL;
 			wrk->task.priv = wrk;
-			AZ(wrk->task.func);
 			VTAILQ_INSERT_HEAD(&pp->idle_queue, &wrk->task, list);
 			if (!stats_clean)
 				WRK_SumStat(wrk);
@@ -283,12 +293,12 @@ Pool_Work_Thread(void *priv, struct worker *wrk)
 				    wrk->vcl == NULL ?  0 : wrk->lastused+60.);
 				if (i == ETIMEDOUT)
 					VCL_Rel(&wrk->vcl);
-			} while (i);
+			} while (wrk->task.func == NULL);
 			tp = &wrk->task;
 		}
 		Lck_Unlock(&pp->mtx);
 
-		if (tp->func == NULL)
+		if (tp->func == pool_kiss_of_death)
 			break;
 
 		assert(wrk->pool == pp);
@@ -387,23 +397,24 @@ pool_herder(void *priv)
 				CAST_OBJ_NOTNULL(wrk, pt->priv, WORKER_MAGIC);
 
 				if (wrk->lastused < t_idle ||
-				    pp->nthr > cache_param->wthread_max)
+				    pp->nthr > cache_param->wthread_max) {
+					/* Give it a kiss on the cheek... */
 					VTAILQ_REMOVE(&pp->idle_queue,
 					    &wrk->task, list);
-				else
+					wrk->task.func = pool_kiss_of_death;
+					AZ(pthread_cond_signal(&wrk->cond));
+				} else
 					wrk = NULL;
 			}
 			Lck_Unlock(&pp->mtx);
 
-			/* And give it a kiss on the cheek... */
 			if (wrk != NULL) {
+				/* Update counters */
 				pp->nthr--;
 				Lck_Lock(&pool_mtx);
 				VSC_C_main->threads--;
 				VSC_C_main->threads_destroyed++;
 				Lck_Unlock(&pool_mtx);
-				wrk->task.func = NULL;
-				wrk->task.priv = NULL;
 				VTIM_sleep(cache_param->wthread_destroy_delay);
 				continue;
 			}
diff --git a/bin/varnishtest/tests/r01490.vtc b/bin/varnishtest/tests/r01490.vtc
new file mode 100644
index 0000000..82ffdb4
--- /dev/null
+++ b/bin/varnishtest/tests/r01490.vtc
@@ -0,0 +1,38 @@
+varnishtest "#1490 - thread destruction"
+
+server s1 {
+} -start
+
+varnish v1 \
+	-arg "-p debug=+syncvsl" \
+	-arg "-p vsl_mask=+WorkThread" \
+	-arg "-p thread_pool_min=2" \
+	-arg "-p thread_pool_max=3" \
+	-arg "-p thread_pools=1" \
+	-vcl+backend {}
+varnish v1 -start
+
+varnish v1 -expect threads == 2
+
+logexpect l1 -v v1 -g raw {
+	expect * 0 WorkThread {^\S+ start$}
+	expect * 0 WorkThread {^\S+ end$}
+} -start
+
+varnish v1 -cliok "param.set thread_pool_min 3"
+
+# Herder thread sleeps 5 seconds. Have to wait longer than that.
+delay 6
+
+varnish v1 -expect threads == 3
+
+varnish v1 -cliok "param.set thread_pool_min 2"
+varnish v1 -cliok "param.set thread_pool_max 2"
+
+# Herder thread sleeps 5 seconds. Have to wait longer than that.
+delay 6
+
+varnish v1 -expect threads == 2
+
+# Use logexpect to see that the thread actually exited
+logexpect l1 -wait
-- 
1.9.2




More information about the varnish-dev mailing list