[PATCH] Remember to signal the thread to exit when decimating the flock
Martin Blix Grydeland
martin at varnish-software.com
Wed Apr 30 13:51:12 CEST 2014
The pool herder didn't signal the thread when decimating the flock,
causing the thread to be leaked.
Spotted by: xcir
Fixes: #1490
Also close a couple of other races:
When pthread_cond_timedwait returns ETIMEDOUT, the predicate
(wrk->task.func == NULL) could still have changed while reacquiring
the mutex. If so, the signal would've been lost and the thread
leaked. Changed the idle wait loop in Pool_Work_Thread() to always
check the predicate before resuming the cond_wait.
The update of the predicate (wrk->task.func) from e.g. Pool_Task() is
done without holding the mutex. This opens a race on the predicate,
and the possibility of the worker going back on cond_wait loosing the
signal. Changed to update the predicate while holding the mutex.
---
bin/varnishd/cache/cache_pool.c | 31 +++++++++++++++++++++----------
bin/varnishtest/tests/r01490.vtc | 38 ++++++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 10 deletions(-)
create mode 100644 bin/varnishtest/tests/r01490.vtc
diff --git a/bin/varnishd/cache/cache_pool.c b/bin/varnishd/cache/cache_pool.c
index 75fac47..fbfb993 100644
--- a/bin/varnishd/cache/cache_pool.c
+++ b/bin/varnishd/cache/cache_pool.c
@@ -163,12 +163,12 @@ pool_accept(struct worker *wrk, void *arg)
}
VTAILQ_REMOVE(&pp->idle_queue, &wrk2->task, list);
AZ(wrk2->task.func);
- Lck_Unlock(&pp->mtx);
assert(sizeof *wa2 == WS_Reserve(wrk2->aws, sizeof *wa2));
wa2 = (void*)wrk2->aws->f;
memcpy(wa2, wa, sizeof *wa);
wrk2->task.func = SES_pool_accept_task;
wrk2->task.priv = pp->sesspool;
+ Lck_Unlock(&pp->mtx);
AZ(pthread_cond_signal(&wrk2->cond));
/*
@@ -204,9 +204,9 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum pool_how how)
if (wrk != NULL) {
VTAILQ_REMOVE(&pp->idle_queue, &wrk->task, list);
AZ(wrk->task.func);
- Lck_Unlock(&pp->mtx);
wrk->task.func = task->func;
wrk->task.priv = task->priv;
+ Lck_Unlock(&pp->mtx);
AZ(pthread_cond_signal(&wrk->cond));
return (0);
}
@@ -237,6 +237,17 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum pool_how how)
}
/*--------------------------------------------------------------------
+ * Empty function used as a pointer value for the thread exit condition.
+ */
+
+static void
+pool_kiss_of_death(struct worker *wrk, void *priv)
+{
+ (void)wrk;
+ (void)priv;
+}
+
+/*--------------------------------------------------------------------
* This is the work function for worker threads in the pool.
*/
@@ -274,7 +285,6 @@ Pool_Work_Thread(void *priv, struct worker *wrk)
wrk->lastused = VTIM_real();
wrk->task.func = NULL;
wrk->task.priv = wrk;
- AZ(wrk->task.func);
VTAILQ_INSERT_HEAD(&pp->idle_queue, &wrk->task, list);
if (!stats_clean)
WRK_SumStat(wrk);
@@ -283,12 +293,12 @@ Pool_Work_Thread(void *priv, struct worker *wrk)
wrk->vcl == NULL ? 0 : wrk->lastused+60.);
if (i == ETIMEDOUT)
VCL_Rel(&wrk->vcl);
- } while (i);
+ } while (wrk->task.func == NULL);
tp = &wrk->task;
}
Lck_Unlock(&pp->mtx);
- if (tp->func == NULL)
+ if (tp->func == pool_kiss_of_death)
break;
assert(wrk->pool == pp);
@@ -387,23 +397,24 @@ pool_herder(void *priv)
CAST_OBJ_NOTNULL(wrk, pt->priv, WORKER_MAGIC);
if (wrk->lastused < t_idle ||
- pp->nthr > cache_param->wthread_max)
+ pp->nthr > cache_param->wthread_max) {
+ /* Give it a kiss on the cheek... */
VTAILQ_REMOVE(&pp->idle_queue,
&wrk->task, list);
- else
+ wrk->task.func = pool_kiss_of_death;
+ AZ(pthread_cond_signal(&wrk->cond));
+ } else
wrk = NULL;
}
Lck_Unlock(&pp->mtx);
- /* And give it a kiss on the cheek... */
if (wrk != NULL) {
+ /* Update counters */
pp->nthr--;
Lck_Lock(&pool_mtx);
VSC_C_main->threads--;
VSC_C_main->threads_destroyed++;
Lck_Unlock(&pool_mtx);
- wrk->task.func = NULL;
- wrk->task.priv = NULL;
VTIM_sleep(cache_param->wthread_destroy_delay);
continue;
}
diff --git a/bin/varnishtest/tests/r01490.vtc b/bin/varnishtest/tests/r01490.vtc
new file mode 100644
index 0000000..82ffdb4
--- /dev/null
+++ b/bin/varnishtest/tests/r01490.vtc
@@ -0,0 +1,38 @@
+varnishtest "#1490 - thread destruction"
+
+server s1 {
+} -start
+
+varnish v1 \
+ -arg "-p debug=+syncvsl" \
+ -arg "-p vsl_mask=+WorkThread" \
+ -arg "-p thread_pool_min=2" \
+ -arg "-p thread_pool_max=3" \
+ -arg "-p thread_pools=1" \
+ -vcl+backend {}
+varnish v1 -start
+
+varnish v1 -expect threads == 2
+
+logexpect l1 -v v1 -g raw {
+ expect * 0 WorkThread {^\S+ start$}
+ expect * 0 WorkThread {^\S+ end$}
+} -start
+
+varnish v1 -cliok "param.set thread_pool_min 3"
+
+# Herder thread sleeps 5 seconds. Have to wait longer than that.
+delay 6
+
+varnish v1 -expect threads == 3
+
+varnish v1 -cliok "param.set thread_pool_min 2"
+varnish v1 -cliok "param.set thread_pool_max 2"
+
+# Herder thread sleeps 5 seconds. Have to wait longer than that.
+delay 6
+
+varnish v1 -expect threads == 2
+
+# Use logexpect to see that the thread actually exited
+logexpect l1 -wait
--
1.9.2
More information about the varnish-dev
mailing list