[master] bd70241 Make sure we don't finish off a backend connection while the waiter is still engaged with it.

Poul-Henning Kamp phk at FreeBSD.org
Wed Jun 24 11:59:14 CEST 2015


commit bd702410005f0963dab2ed93c1407ddaa577db12
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date:   Wed Jun 24 09:56:30 2015 +0000

    Make sure we don't finish off a backend connection while the waiter
    is still engaged with it.
    
    Simplify the "do a single retry" logic slightly while here.
    
    Add more asserts.
    
    This probably fixes #1675.
    
    Initial diagnosis by:	martin

diff --git a/bin/varnishd/cache/cache_backend.c b/bin/varnishd/cache/cache_backend.c
index 5824504..f85e4ca 100644
--- a/bin/varnishd/cache/cache_backend.c
+++ b/bin/varnishd/cache/cache_backend.c
@@ -169,8 +169,12 @@ vbe_dir_finish(const struct director *d, struct worker *wrk,
 	CAST_OBJ_NOTNULL(bp, d->priv, BACKEND_MAGIC);
 
 	CHECK_OBJ_NOTNULL(bo->htc, HTTP_CONN_MAGIC);
+	CHECK_OBJ_ORNULL(bo->htc->vbc, VBC_MAGIC);
 	if (bo->htc->vbc == NULL)
 		return;
+	if (bo->htc->vbc->state != VBC_STATE_USED)
+		VBT_Wait(wrk, bo->htc->vbc);
+	CHECK_OBJ_NOTNULL(bo->htc->vbc->backend, BACKEND_MAGIC);
 	bo->htc->vbc->backend = NULL;
 	if (bo->doclose != SC_NULL) {
 		VSLb(bo->vsl, SLT_BackendClose, "%d %s", bo->htc->vbc->fd,
@@ -196,7 +200,7 @@ static int __match_proto__(vdi_gethdrs_f)
 vbe_dir_gethdrs(const struct director *d, struct worker *wrk,
     struct busyobj *bo)
 {
-	int i, extrachance = 0;
+	int i, extrachance = 1;
 	struct backend *bp;
 
 	CHECK_OBJ_NOTNULL(d, DIRECTOR_MAGIC);
@@ -204,43 +208,38 @@ vbe_dir_gethdrs(const struct director *d, struct worker *wrk,
 	CHECK_OBJ_NOTNULL(bo, BUSYOBJ_MAGIC);
 	CAST_OBJ_NOTNULL(bp, d->priv, BACKEND_MAGIC);
 
-	i = vbe_dir_getfd(wrk, d, bo);
-	if (i < 0) {
-		VSLb(bo->vsl, SLT_FetchError, "no backend connection");
-		return (-1);
-	}
-	AN(bo->htc);
-	if (bo->htc->vbc->state == VBC_STATE_STOLEN)
-		extrachance = 1;
-
-	i = V1F_fetch_hdr(wrk, bo, bp->hosthdr);
-	/*
-	 * If we recycle a backend connection, there is a finite chance
-	 * that the backend closed it before we get a request to it.
-	 * Do a single retry in that case.
-	 */
-	if (i == 1 && extrachance) {
-		vbe_dir_finish(d, wrk, bo);
-		AZ(bo->htc);
-		VSC_C_main->backend_retry++;
-		bo->doclose = SC_NULL;
+	do {
 		i = vbe_dir_getfd(wrk, d, bo);
 		if (i < 0) {
 			VSLb(bo->vsl, SLT_FetchError, "no backend connection");
-			bo->htc = NULL;
 			return (-1);
 		}
 		AN(bo->htc);
+		if (bo->htc->vbc->state != VBC_STATE_STOLEN)
+			extrachance = 0;
+
 		i = V1F_fetch_hdr(wrk, bo, bp->hosthdr);
-	}
-	if (i != 0) {
+		/*
+		 * If we recycled a backend connection, there is a finite chance
+		 * that the backend closed it before we got the bereq to it.
+		 * In that case do a single automatic retry if req.boy allows.
+		 */
+		if (i == 0) {
+			AN(bo->htc->vbc);
+			return (0);
+		}
 		vbe_dir_finish(d, wrk, bo);
-		bo->doclose = SC_NULL;
 		AZ(bo->htc);
-	} else {
-		AN(bo->htc->vbc);
-	}
-	return (i);
+		if (i < 0)
+			break;
+		if (bo->req != NULL &&
+		    bo->req->req_body_status != REQ_BODY_NONE &&
+		    bo->req->req_body_status != REQ_BODY_CACHED)
+			break;
+		VSC_C_main->backend_retry++;
+		bo->doclose = SC_NULL;
+	} while (extrachance);
+	return (-1);
 }
 
 static int __match_proto__(vdi_getbody_f)
diff --git a/bin/varnishd/cache/cache_backend_tcp.c b/bin/varnishd/cache/cache_backend_tcp.c
index 75f216d..9a658dd 100644
--- a/bin/varnishd/cache/cache_backend_tcp.c
+++ b/bin/varnishd/cache/cache_backend_tcp.c
@@ -401,11 +401,13 @@ VBT_Wait(struct worker *wrk, const struct vbc *vbc)
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(vbc, VBC_MAGIC);
+	CHECK_OBJ_NOTNULL(vbc->backend, BACKEND_MAGIC);
 	tp = vbc->tcp_pool;
 	CHECK_OBJ_NOTNULL(tp, TCP_POOL_MAGIC);
 	assert(vbc->wrk == wrk);
 	Lck_Lock(&tp->mtx);
 	while (vbc->state == VBC_STATE_STOLEN)
 		AZ(Lck_CondWait(&wrk->cond, &tp->mtx, 0));
+	assert(vbc->state == VBC_STATE_USED);
 	Lck_Unlock(&tp->mtx);
 }
diff --git a/bin/varnishd/http1/cache_http1_fetch.c b/bin/varnishd/http1/cache_http1_fetch.c
index 106fd03..12f18f1 100644
--- a/bin/varnishd/http1/cache_http1_fetch.c
+++ b/bin/varnishd/http1/cache_http1_fetch.c
@@ -80,7 +80,6 @@ V1F_fetch_hdr(struct worker *wrk, struct busyobj *bo, const char *def_host)
 {
 	struct http *hp;
 	enum htc_status_e hs;
-	int retry = 1;
 	int j, first;
 	ssize_t i;
 	struct http_conn *htc;
@@ -120,14 +119,12 @@ V1F_fetch_hdr(struct worker *wrk, struct busyobj *bo, const char *def_host)
 			V1L_Chunked(wrk);
 		i = VRB_Iterate(bo->req, vbf_iter_req_body, bo);
 
-		if (bo->req->req_body_status == REQ_BODY_TAKEN) {
-			retry = -1;
-		} else if (bo->req->req_body_status == REQ_BODY_FAIL) {
+		if (bo->req->req_body_status == REQ_BODY_FAIL) {
+			assert(i < 0);
 			VSLb(bo->vsl, SLT_FetchError,
 			    "req.body read error: %d (%s)",
 			    errno, strerror(errno));
 			bo->req->doclose = SC_RX_BODY;
-			retry = -1;
 		}
 		if (do_chunked)
 			V1L_EndChunk(wrk);
@@ -139,7 +136,7 @@ V1F_fetch_hdr(struct worker *wrk, struct busyobj *bo, const char *def_host)
 		    errno, strerror(errno));
 		VSLb_ts_busyobj(bo, "Bereq", W_TIM_real(wrk));
 		bo->doclose = SC_TX_ERROR;
-		return (retry);
+		return (1);
 	}
 	VSLb_ts_busyobj(bo, "Bereq", W_TIM_real(wrk));
 
@@ -181,10 +178,9 @@ V1F_fetch_hdr(struct worker *wrk, struct busyobj *bo, const char *def_host)
 			VSLb(bo->vsl, SLT_FetchError, "http %sread error: EOF",
 			    first ? "first " : "");
 			bo->doclose = SC_RX_TIMEOUT;
-			return (retry);
+			return (first ? 1 : -1);
 		}
 		if (first) {
-			retry = -1;
 			first = 0;
 			VTCP_set_read_timeout(htc->fd,
 			    htc->between_bytes_timeout);



More information about the varnish-commit mailing list