[master] 3c67da447 varnishtest: Make barriers reactive on error

Dridi Boukelmoune dridi.boukelmoune at gmail.com
Wed Jun 23 07:00:09 UTC 2021


commit 3c67da44753cccbf5db4d2bef881abe5b7c817f4
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date:   Wed Jun 23 08:06:22 2021 +0200

    varnishtest: Make barriers reactive on error
    
    A failing test may take up to the vtc timeout to effectively fail if a
    thread is waiting for a barrier to sync. Barriers now check every 100ms
    whether the vtc is done to bail out.
    
    The socket barriers were already polling for 1s, so now they poll for
    100ms and leave the loop early when needed.
    
    The condvar barriers now loop over a timedwait, but critical sections
    were too wide. There were vtc_fatal() calls under barrier locks that
    would defeat the timedwait loop.
    
    Thus critical sections were more carefully laid, allowing multiple
    barrier commands to run simultaneously. This also means that besides
    initialization, socket barriers no longer touch the lock.
    
    Consider the following diff:
    
        --- bin/varnishtest/tests/v00056.vtc
        +++ bin/varnishtest/tests/v00056.vtc
        @@ -72,7 +72,7 @@ client c2 {
         } -run
    
         logexpect l2 -v v1 -g raw -q Backend_health {
        -       expect 0 0 Backend_health "default Went healthy"
        +       expect 0 0 Backend_health "default Went wrong"
         } -start
    
         barrier b3 sync
    
    With this change l2 would fail early, typically in less than 2s, but the
    barriers stuck in a condwait would prevent further progress.

diff --git a/bin/varnishtest/vtc_barrier.c b/bin/varnishtest/vtc_barrier.c
index 3a83b771d..25e63bb86 100644
--- a/bin/varnishtest/vtc_barrier.c
+++ b/bin/varnishtest/vtc_barrier.c
@@ -41,6 +41,7 @@
 
 #include "vtc.h"
 #include "vtcp.h"
+#include "vtim.h"
 #include "vsa.h"
 
 enum barrier_e {
@@ -119,8 +120,10 @@ barrier_cond(struct barrier *b, const char *av, struct vtclog *vl)
 {
 
 	CHECK_OBJ_NOTNULL(b, BARRIER_MAGIC);
+	AZ(pthread_mutex_lock(&b->mtx));
 	barrier_expect(b, av, vl);
 	b->type = BARRIER_COND;
+	AZ(pthread_mutex_unlock(&b->mtx));
 }
 
 static void *
@@ -170,11 +173,11 @@ barrier_sock_thread(void *priv)
 	conns = calloc(b->expected, sizeof *conns);
 	AN(conns);
 
-	while (b->active) {
+	while (b->active && !vtc_stop && !vtc_error) {
 		pfd[0].fd = sock;
 		pfd[0].events = POLLIN;
 
-		i = poll(pfd, 1, 1000);
+		i = poll(pfd, 1, 100);
 		if (i == 0)
 			continue;
 		if (i < 0) {
@@ -222,6 +225,19 @@ barrier_sock_thread(void *priv)
 			b->active = 0;
 	}
 
+	if (vtc_stop || vtc_error) {
+		AN(b->active);
+		/* wake up outstanding waiters */
+		for (i = 0; i < b->waiters; i++)
+			closefd(&conns[i]);
+		b->waiters = 0;
+		b->active = 0;
+	}
+
+	if (b->cyclic && b->waiters > 0)
+		vtc_fatal(vl, "Barrier(%s) has %u outstanding waiters",
+		    b->name, b->waiters);
+
 	macro_undef(vl, b->name, "addr");
 	macro_undef(vl, b->name, "port");
 	macro_undef(vl, b->name, "sock");
@@ -237,6 +253,7 @@ barrier_sock(struct barrier *b, const char *av, struct vtclog *vl)
 {
 
 	CHECK_OBJ_NOTNULL(b, BARRIER_MAGIC);
+	AZ(pthread_mutex_lock(&b->mtx));
 	barrier_expect(b, av, vl);
 	b->type = BARRIER_SOCK;
 	b->active = 1;
@@ -247,23 +264,33 @@ barrier_sock(struct barrier *b, const char *av, struct vtclog *vl)
 	 */
 	AZ(pthread_create(&b->thread, NULL, barrier_sock_thread, b));
 	AZ(pthread_cond_wait(&b->cond, &b->mtx));
+	AZ(pthread_mutex_unlock(&b->mtx));
 }
 
 static void
 barrier_cyclic(struct barrier *b, struct vtclog *vl)
 {
+	enum barrier_e t;
+	int w;
 
 	CHECK_OBJ_NOTNULL(b, BARRIER_MAGIC);
 
-	if (b->type == BARRIER_NONE)
+	AZ(pthread_mutex_lock(&b->mtx));
+	t = b->type;
+	w = b->waiters;
+	AZ(pthread_mutex_unlock(&b->mtx));
+
+	if (t == BARRIER_NONE)
 		vtc_fatal(vl,
 		    "Barrier(%s) use error: not initialized", b->name);
 
-	if (b->waiters != 0)
+	if (w != 0)
 		vtc_fatal(vl,
 		    "Barrier(%s) use error: already in use", b->name);
 
+	AZ(pthread_mutex_lock(&b->mtx));
 	b->cyclic = 1;
+	AZ(pthread_mutex_unlock(&b->mtx));
 }
 
 /**********************************************************************
@@ -273,17 +300,29 @@ barrier_cyclic(struct barrier *b, struct vtclog *vl)
 static void
 barrier_cond_sync(struct barrier *b, struct vtclog *vl)
 {
+	struct timespec ts;
+	int r, w;
 
 	CHECK_OBJ_NOTNULL(b, BARRIER_MAGIC);
 	assert(b->type == BARRIER_COND);
 
-	assert(b->waiters <= b->expected);
-	if (b->waiters == b->expected)
+	AZ(pthread_mutex_lock(&b->mtx));
+	w = b->waiters;
+	assert(w <= b->expected);
+
+	if (w == b->expected)
+		w = -1;
+	else
+		b->waiters = ++w;
+	AZ(pthread_mutex_unlock(&b->mtx));
+
+	if (w < 0)
 		vtc_fatal(vl,
 		    "Barrier(%s) use error: more waiters than the %u expected",
 		    b->name, b->expected);
 
-	if (++b->waiters == b->expected) {
+	AZ(pthread_mutex_lock(&b->mtx));
+	if (w == b->expected) {
 		vtc_log(vl, 4, "Barrier(%s) wake %u", b->name, b->expected);
 		if (b->cyclic)
 			b->waiters = 0;
@@ -291,8 +330,13 @@ barrier_cond_sync(struct barrier *b, struct vtclog *vl)
 	} else {
 		vtc_log(vl, 4, "Barrier(%s) wait %u of %u",
 		    b->name, b->waiters, b->expected);
-		AZ(pthread_cond_wait(&b->cond, &b->mtx));
+		do {
+			ts = VTIM_timespec(VTIM_real() + .1);
+			r = pthread_cond_timedwait(&b->cond, &b->mtx, &ts);
+			assert(r == 0 || r == ETIMEDOUT);
+		} while (!vtc_stop && !vtc_error && r == ETIMEDOUT);
 	}
+	AZ(pthread_mutex_unlock(&b->mtx));
 }
 
 static void
@@ -317,11 +361,7 @@ barrier_sock_sync(struct barrier *b, struct vtclog *vl)
 
 	VSB_destroy(&vsb);
 
-	/* emulate pthread_cond_wait's behavior */
-	AZ(pthread_mutex_unlock(&b->mtx));
 	sz = read(sock, buf, sizeof buf); /* XXX loop with timeout? */
-	AZ(pthread_mutex_lock(&b->mtx));
-
 	i = errno;
 	closefd(&sock);
 
@@ -420,7 +460,6 @@ cmd_barrier(CMD_ARGS)
 				break;
 			case BARRIER_SOCK:
 				if (b->need_join) {
-					b->active = 0;
 					AZ(pthread_join(b->thread, NULL));
 					b->need_join = 0;
 				}
@@ -445,7 +484,6 @@ cmd_barrier(CMD_ARGS)
 		b = barrier_new(av[0], vl);
 	av++;
 
-	AZ(pthread_mutex_lock(&b->mtx));
 	for (; *av != NULL; av++) {
 		if (!strcmp(*av, "cond")) {
 			av++;
@@ -469,5 +507,4 @@ cmd_barrier(CMD_ARGS)
 		}
 		vtc_fatal(vl, "Unknown barrier argument: %s", *av);
 	}
-	AZ(pthread_mutex_unlock(&b->mtx));
 }


More information about the varnish-commit mailing list