[master] f0ee94ecf vbe: Try fetching beresp when sending bereq failed

Dridi Boukelmoune dridi.boukelmoune at gmail.com
Tue May 31 08:02:05 UTC 2022


commit f0ee94ecf548e936ce64c0cfb484877dfb0e4b88
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date:   Thu Dec 16 16:56:06 2021 +0100

    vbe: Try fetching beresp when sending bereq failed
    
    There is no cancellation mechanism in the middle of an HTTP/1
    transaction besides closing the session. If a backend closes
    the connection before the end of the bereq delivery, it may
    also send a 4XX or 5XX response that could be valuable for the
    original client.
    
    There are several things to take into consideration: it is
    the responsibility of the backend implementation to raise the
    bo->send_failed flag, if the backend replied something before
    closing we shouldn't trigger a timeout, the connection must not
    be recycled, and the object must not be cached.
    
    Fixes #2332
    Closes #3686

diff --git a/bin/varnishd/cache/cache_backend.c b/bin/varnishd/cache/cache_backend.c
index 7ac117cc4..0e328bb18 100644
--- a/bin/varnishd/cache/cache_backend.c
+++ b/bin/varnishd/cache/cache_backend.c
@@ -243,7 +243,8 @@ vbe_dir_finish(VRT_CTX, VCL_BACKEND d)
 		AZ(pfd);
 		Lck_Lock(bp->director->mtx);
 	} else {
-		assert (PFD_State(pfd) == PFD_STATE_USED);
+		assert(PFD_State(pfd) == PFD_STATE_USED);
+		AZ(bo->send_failed);
 		VSLb(bo->vsl, SLT_BackendClose, "%d %s recycle", *PFD_Fd(pfd),
 		    VRT_BACKEND_string(d));
 		Lck_Lock(bp->director->mtx);
@@ -268,6 +269,7 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
 	struct pfd *pfd;
 	struct busyobj *bo;
 	struct worker *wrk;
+	stream_close_t sc;
 
 	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(d, DIRECTOR_MAGIC);
@@ -275,6 +277,7 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
 	CHECK_OBJ_NOTNULL(bo, BUSYOBJ_MAGIC);
 	if (bo->htc != NULL)
 		CHECK_OBJ_NOTNULL(bo->htc->doclose, STREAM_CLOSE_MAGIC);
+	AZ(bo->send_failed);
 	wrk = ctx->bo->wrk;
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CAST_OBJ_NOTNULL(bp, d->priv, BACKEND_MAGIC);
@@ -301,8 +304,10 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
 		i = V1F_SendReq(wrk, bo, &bo->acct.bereq_hdrbytes,
 		    &bo->acct.bereq_bodybytes);
 
-		if (i == 0 && PFD_State(pfd) != PFD_STATE_USED) {
-			if (VCP_Wait(wrk, pfd, VTIM_real() +
+		if (PFD_State(pfd) != PFD_STATE_USED) {
+			if (bo->send_failed)
+				(void)VCP_Wait(wrk, pfd, VTIM_real());
+			else if (VCP_Wait(wrk, pfd, VTIM_real() +
 			    bo->htc->first_byte_timeout) != 0) {
 				bo->htc->doclose = SC_RX_TIMEOUT;
 				VSLb(bo->vsl, SLT_FetchError,
@@ -311,17 +316,25 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
 			}
 		}
 
-		if (bo->htc->doclose == SC_NULL) {
+		if (bo->htc->doclose == SC_NULL)
 			assert(PFD_State(pfd) == PFD_STATE_USED);
+
+		sc = bo->htc->doclose;
+		if (i == 0 || bo->send_failed) {
+			i = V1F_FetchRespHdr(bo);
 			if (i == 0)
-				i = V1F_FetchRespHdr(bo);
-			if (i == 0) {
 				AN(bo->htc->priv);
-				return (0);
-			}
 		}
 		CHECK_OBJ_NOTNULL(bo->htc->doclose, STREAM_CLOSE_MAGIC);
 
+		if (bo->send_failed) {
+			assert(sc != SC_NULL);
+			bo->htc->doclose = sc;
+		}
+
+		if (i == 0)
+			return (0);
+
 		/*
 		 * If we recycled a backend connection, there is a finite chance
 		 * that the backend closed it before we got the bereq to it.
diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c
index fde7e29a9..047dec969 100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@ -330,6 +330,7 @@ vbf_stp_retry(struct worker *wrk, struct busyobj *bo)
 	bo->do_esi = 0;
 	bo->do_stream = 1;
 	bo->was_304 = 0;
+	bo->send_failed = 0;
 	bo->err_code = 0;
 	bo->err_reason = NULL;
 	if (bo->htc != NULL)
@@ -547,6 +548,8 @@ vbf_stp_startfetch(struct worker *wrk, struct busyobj *bo)
 	}
 	if (bo->uncacheable)
 		oc->flags |= OC_F_HFM;
+	if (bo->send_failed)
+		HSH_Kill(oc);
 
 	assert(wrk->handling == VCL_RET_DELIVER);
 
@@ -746,7 +749,7 @@ vbf_stp_fetchend(struct worker *wrk, struct busyobj *bo)
 
 	ObjSetState(wrk, oc, BOS_FINISHED);
 	VSLb_ts_busyobj(bo, "BerespBody", W_TIM_real(wrk));
-	if (bo->stale_oc != NULL)
+	if (bo->stale_oc != NULL && !bo->send_failed)
 		HSH_Kill(bo->stale_oc);
 	return (F_STP_DONE);
 }
diff --git a/bin/varnishd/http1/cache_http1_fetch.c b/bin/varnishd/http1/cache_http1_fetch.c
index 7d0e6c015..8a3245f4e 100644
--- a/bin/varnishd/http1/cache_http1_fetch.c
+++ b/bin/varnishd/http1/cache_http1_fetch.c
@@ -119,15 +119,6 @@ V1F_SendReq(struct worker *wrk, struct busyobj *bo, uint64_t *ctr_hdrbytes,
 			bo->no_retry = "req.body not cached";
 
 		if (bo->req->req_body_status == BS_ERROR) {
-			/*
-			 * XXX: (#2332) We should test to see if the backend
-			 * XXX: sent us some headers explaining why.
-			 * XXX: This is hard because of the mistaken API split
-			 * XXX: between cache_backend.c and V1F, and therefore
-			 * XXX: Parked in this comment, pending renovation of
-			 * XXX: the VDI/backend-protocol API to allow non-H1
-			 * XXX: backends.
-			 */
 			assert(i < 0);
 			VSLb(bo->vsl, SLT_FetchError,
 			    "req.body read error: %d (%s)",
@@ -158,10 +149,15 @@ V1F_SendReq(struct worker *wrk, struct busyobj *bo, uint64_t *ctr_hdrbytes,
 		    errno, VAS_errtxt(errno), sc->desc);
 		VSLb_ts_busyobj(bo, "Bereq", W_TIM_real(wrk));
 		htc->doclose = sc;
+		/* NB: only raise the flag if we managed to at least send
+		 * the request headers.
+		 */
+		bo->send_failed = bytes >= hdrbytes;
 		return (-1);
 	}
 	CHECK_OBJ_NOTNULL(sc, STREAM_CLOSE_MAGIC);
 	VSLb_ts_busyobj(bo, "Bereq", W_TIM_real(wrk));
+	bo->send_failed = 0;
 	return (0);
 }
 
@@ -171,7 +167,7 @@ V1F_FetchRespHdr(struct busyobj *bo)
 
 	struct http *hp;
 	int i;
-	double t;
+	vtim_real t;
 	struct http_conn *htc;
 	enum htc_status_e hs;
 
@@ -190,7 +186,9 @@ V1F_FetchRespHdr(struct busyobj *bo)
 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
 	CHECK_OBJ_NOTNULL(bo->htc, HTTP_CONN_MAGIC);
 
-	t = VTIM_real() + htc->first_byte_timeout;
+	t = VTIM_real();
+	if (!bo->send_failed)
+		t += htc->first_byte_timeout;
 	hs = HTC_RxStuff(htc, HTTP1_Complete, NULL, NULL,
 	    t, NAN, htc->between_bytes_timeout, cache_param->http_resp_size);
 	if (hs != HTC_S_COMPLETE) {
@@ -214,6 +212,8 @@ V1F_FetchRespHdr(struct busyobj *bo)
 			htc->doclose = SC_RX_OVERFLOW;
 			break;
 		case HTC_S_IDLE:
+			if (bo->send_failed)
+				break;
 			VSLb(bo->vsl, SLT_FetchError, "first byte timeout");
 			htc->doclose = SC_RX_TIMEOUT;
 			break;
diff --git a/bin/varnishtest/tests/b00073.vtc b/bin/varnishtest/tests/b00073.vtc
index 3372ba094..3aea0207e 100644
--- a/bin/varnishtest/tests/b00073.vtc
+++ b/bin/varnishtest/tests/b00073.vtc
@@ -27,13 +27,20 @@ server s1 {
 	expect req.http.unset-connection == true
 	txresp
 	expect_close
+
+	accept
+	rxreq
 } -start
 
 varnish v1 -vcl+backend {
+	backend bad { .host = "${bad_backend}"; }
 	sub vcl_recv {
 		return (pass);
 	}
 	sub vcl_backend_fetch {
+		if (bereq.http.fail == "send") {
+			set bereq.backend = bad;
+		}
 		set bereq.http.connection = bereq.http.bereq-connection;
 	}
 	sub vcl_backend_response {
@@ -61,9 +68,19 @@ client c1 {
 	txreq -hdr "bereq-connection: close" -hdr "unset-connection: true"
 	rxresp
 	expect resp.status == 200
+
+	txreq -hdr "fail: fetch"
+	rxresp
+	expect resp.status == 503
 } -run
 
 server s1 -wait
 
+client c2 {
+	txreq -hdr "fail: send"
+	rxresp
+	expect resp.status == 503
+} -run
+
 varnish v1 -expect MAIN.backend_recycle == 0
 varnish v1 -expect VBE.vcl1.s1.conn == 0
diff --git a/bin/varnishtest/tests/f00001.vtc b/bin/varnishtest/tests/f00001.vtc
index 2c756c9cd..9d52b06fc 100644
--- a/bin/varnishtest/tests/f00001.vtc
+++ b/bin/varnishtest/tests/f00001.vtc
@@ -4,7 +4,7 @@ varnishtest "Check that we handle bogusly large chunks correctly"
 
 server s1 {
 	rxreq
-	txresp
+	txresp -status 400
 } -start
 
 varnish v1 -vcl+backend {
@@ -18,7 +18,7 @@ client c1 {
 	send "0\r\n\r\n"
 
 	rxresp
-	expect resp.status == 503
+	expect resp.status == 400
 } -run
 
 # Check that the published workaround does not cause harm
diff --git a/bin/varnishtest/tests/r02332.vtc b/bin/varnishtest/tests/r02332.vtc
new file mode 100644
index 000000000..b76d8a3d2
--- /dev/null
+++ b/bin/varnishtest/tests/r02332.vtc
@@ -0,0 +1,99 @@
+varnishtest "Fetch beresp despite failure to send bereq"
+
+barrier b1 cond 2
+barrier b2 cond 2
+
+# bo->send_failed
+
+server s1 {
+	rxreqhdrs
+	txresp -status 400 -body "Invalid request"
+} -start
+
+varnish v1 -vcl+backend {
+	sub vcl_backend_fetch {
+		if (bereq.http.ignore == "bgfetch") {
+			return (abandon);
+		}
+		return (fetch); # don't unset bereq.body
+	}
+} -start
+
+logexpect l1 -v v1 -g raw -i BackendClose {
+	expect 0 1002 BackendClose "s1 close"
+	expect 1 1007 BackendClose "s1 close"
+} -start
+
+client c1 {
+	txreq -method POST -hdr "Transfer-Encoding: chunked"
+	chunkedlen 100
+	barrier b1 sync
+	non_fatal
+	chunkedlen 100
+	chunkedlen 0
+	fatal
+	rxresp
+	expect resp.status == 400
+	expect resp.body == "Invalid request"
+} -start
+
+server s1 -wait
+barrier b1 sync
+client c1 -wait
+
+# bo->send_failed && 304
+
+server s1 {
+	rxreq
+	txresp -hdr "Cache-Control: public, max-age=1" -hdr {Etag: "abc"} \
+	    -hdr "version: original" -body can-revalidate
+
+	rxreqhdrs
+	expect req.http.if-none-match == {"abc"}
+	txresp -status 304 -hdr "Cache-Control: public" -hdr {Etag: "abc"} \
+	    -hdr "version: refreshed"
+} -start
+
+client c2 {
+	txreq -hdr "Transfer-Encoding: chunked"
+	chunkedlen 100
+	chunkedlen 100
+	chunkedlen 0
+	rxresp
+	expect resp.http.etag == {"abc"}
+	expect resp.http.version == original
+	expect resp.body == can-revalidate
+
+	delay 2
+
+	# bereq.send_failed during grace
+	txreq -hdr "Transfer-Encoding: chunked"
+	chunkedlen 100
+	barrier b2 sync
+	non_fatal
+	chunkedlen 100
+	chunkedlen 0
+	fatal
+	rxresp
+	expect resp.status == 200
+	expect resp.http.etag == {"abc"}
+	expect resp.http.version == original
+	expect resp.body == can-revalidate
+} -start
+
+server s1 -wait
+barrier b2 sync
+client c2 -wait
+
+client c3 {
+	txreq -hdr "Transfer-Encoding: chunked" -hdr "ignore: bgfetch"
+	chunkedlen 100
+	chunkedlen 100
+	chunkedlen 0
+	rxresp
+	expect resp.http.etag == {"abc"}
+	expect resp.http.version == original
+	expect resp.body == can-revalidate
+} -run
+
+logexpect l1 -wait
diff --git a/doc/sphinx/reference/directors.rst b/doc/sphinx/reference/directors.rst
index 66c0f406c..30888a99e 100644
--- a/doc/sphinx/reference/directors.rst
+++ b/doc/sphinx/reference/directors.rst
@@ -195,7 +195,9 @@ statistics, it is essentially a director which state is a ``struct
 backend``. Varnish native backends currently speak HTTP/1 over TCP or
 UDS, and as such, you need to make your own custom backend if you want
 Varnish to do otherwise such as connect over UDP or speak a different
-protocol.
+protocol. A custom backend implementation must implement the ``gethdrs``
+method, and optionally ``http1pipe``. It is the responsibility of the
+custom backend to raise the ``send_failed`` flag from ``struct busyobj``.
 
 If you want to leverage probes declarations in VCL, which have the advantage of
 being reusable since they are only specifications, you can. However, you need
diff --git a/include/tbl/bo_flags.h b/include/tbl/bo_flags.h
index 4a4f1dd4b..624494d43 100644
--- a/include/tbl/bo_flags.h
+++ b/include/tbl/bo_flags.h
@@ -44,6 +44,7 @@ BO_FLAG(was_304,	0, 1, 0, 0, "")
 BO_FLAG(is_bgfetch,	1, 0, 0, 0, "")
 BO_FLAG(is_hitmiss,	1, 0, 0, 0, "")
 BO_FLAG(is_hitpass,	1, 0, 0, 0, "")
+BO_FLAG(send_failed,	0, 0, 0, 0, "")
 #undef BO_FLAG
 
 /*lint -restore */


More information about the varnish-commit mailing list