[master] f0ee94ecf vbe: Try fetching beresp when sending bereq failed
Dridi Boukelmoune
dridi.boukelmoune at gmail.com
Tue May 31 08:02:05 UTC 2022
commit f0ee94ecf548e936ce64c0cfb484877dfb0e4b88
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date: Thu Dec 16 16:56:06 2021 +0100
vbe: Try fetching beresp when sending bereq failed
There is no cancellation mechanism in the middle of an HTTP/1
transaction besides closing the session. If a backend closes
the connection before the end of the bereq delivery, it may
also send a 4XX or 5XX response that could be valuable for the
original client.
There are several things to take into consideration: it is
the responsibility of the backend implementation to raise the
bo->send_failed flag, if the backend replied something before
closing we shouldn't trigger a timeout, the connection must not
be recycled, and the object must not be cached.
Fixes #2332
Closes #3686
diff --git a/bin/varnishd/cache/cache_backend.c b/bin/varnishd/cache/cache_backend.c
index 7ac117cc4..0e328bb18 100644
--- a/bin/varnishd/cache/cache_backend.c
+++ b/bin/varnishd/cache/cache_backend.c
@@ -243,7 +243,8 @@ vbe_dir_finish(VRT_CTX, VCL_BACKEND d)
AZ(pfd);
Lck_Lock(bp->director->mtx);
} else {
- assert (PFD_State(pfd) == PFD_STATE_USED);
+ assert(PFD_State(pfd) == PFD_STATE_USED);
+ AZ(bo->send_failed);
VSLb(bo->vsl, SLT_BackendClose, "%d %s recycle", *PFD_Fd(pfd),
VRT_BACKEND_string(d));
Lck_Lock(bp->director->mtx);
@@ -268,6 +269,7 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
struct pfd *pfd;
struct busyobj *bo;
struct worker *wrk;
+ stream_close_t sc;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(d, DIRECTOR_MAGIC);
@@ -275,6 +277,7 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
CHECK_OBJ_NOTNULL(bo, BUSYOBJ_MAGIC);
if (bo->htc != NULL)
CHECK_OBJ_NOTNULL(bo->htc->doclose, STREAM_CLOSE_MAGIC);
+ AZ(bo->send_failed);
wrk = ctx->bo->wrk;
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
CAST_OBJ_NOTNULL(bp, d->priv, BACKEND_MAGIC);
@@ -301,8 +304,10 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
i = V1F_SendReq(wrk, bo, &bo->acct.bereq_hdrbytes,
&bo->acct.bereq_bodybytes);
- if (i == 0 && PFD_State(pfd) != PFD_STATE_USED) {
- if (VCP_Wait(wrk, pfd, VTIM_real() +
+ if (PFD_State(pfd) != PFD_STATE_USED) {
+ if (bo->send_failed)
+ (void)VCP_Wait(wrk, pfd, VTIM_real());
+ else if (VCP_Wait(wrk, pfd, VTIM_real() +
bo->htc->first_byte_timeout) != 0) {
bo->htc->doclose = SC_RX_TIMEOUT;
VSLb(bo->vsl, SLT_FetchError,
@@ -311,17 +316,25 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
}
}
- if (bo->htc->doclose == SC_NULL) {
+ if (bo->htc->doclose == SC_NULL)
assert(PFD_State(pfd) == PFD_STATE_USED);
+
+ sc = bo->htc->doclose;
+ if (i == 0 || bo->send_failed) {
+ i = V1F_FetchRespHdr(bo);
if (i == 0)
- i = V1F_FetchRespHdr(bo);
- if (i == 0) {
AN(bo->htc->priv);
- return (0);
- }
}
CHECK_OBJ_NOTNULL(bo->htc->doclose, STREAM_CLOSE_MAGIC);
+ if (bo->send_failed) {
+ assert(sc != SC_NULL);
+ bo->htc->doclose = sc;
+ }
+
+ if (i == 0)
+ return (0);
+
/*
* If we recycled a backend connection, there is a finite chance
* that the backend closed it before we got the bereq to it.
diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c
index fde7e29a9..047dec969 100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@ -330,6 +330,7 @@ vbf_stp_retry(struct worker *wrk, struct busyobj *bo)
bo->do_esi = 0;
bo->do_stream = 1;
bo->was_304 = 0;
+ bo->send_failed = 0;
bo->err_code = 0;
bo->err_reason = NULL;
if (bo->htc != NULL)
@@ -547,6 +548,8 @@ vbf_stp_startfetch(struct worker *wrk, struct busyobj *bo)
}
if (bo->uncacheable)
oc->flags |= OC_F_HFM;
+ if (bo->send_failed)
+ HSH_Kill(oc);
assert(wrk->handling == VCL_RET_DELIVER);
@@ -746,7 +749,7 @@ vbf_stp_fetchend(struct worker *wrk, struct busyobj *bo)
ObjSetState(wrk, oc, BOS_FINISHED);
VSLb_ts_busyobj(bo, "BerespBody", W_TIM_real(wrk));
- if (bo->stale_oc != NULL)
+ if (bo->stale_oc != NULL && !bo->send_failed)
HSH_Kill(bo->stale_oc);
return (F_STP_DONE);
}
diff --git a/bin/varnishd/http1/cache_http1_fetch.c b/bin/varnishd/http1/cache_http1_fetch.c
index 7d0e6c015..8a3245f4e 100644
--- a/bin/varnishd/http1/cache_http1_fetch.c
+++ b/bin/varnishd/http1/cache_http1_fetch.c
@@ -119,15 +119,6 @@ V1F_SendReq(struct worker *wrk, struct busyobj *bo, uint64_t *ctr_hdrbytes,
bo->no_retry = "req.body not cached";
if (bo->req->req_body_status == BS_ERROR) {
- /*
- * XXX: (#2332) We should test to see if the backend
- * XXX: sent us some headers explaining why.
- * XXX: This is hard because of the mistaken API split
- * XXX: between cache_backend.c and V1F, and therefore
- * XXX: Parked in this comment, pending renovation of
- * XXX: the VDI/backend-protocol API to allow non-H1
- * XXX: backends.
- */
assert(i < 0);
VSLb(bo->vsl, SLT_FetchError,
"req.body read error: %d (%s)",
@@ -158,10 +149,15 @@ V1F_SendReq(struct worker *wrk, struct busyobj *bo, uint64_t *ctr_hdrbytes,
errno, VAS_errtxt(errno), sc->desc);
VSLb_ts_busyobj(bo, "Bereq", W_TIM_real(wrk));
htc->doclose = sc;
+ /* NB: only raise the flag if we managed to at least send
+ * the request headers.
+ */
+ bo->send_failed = bytes >= hdrbytes;
return (-1);
}
CHECK_OBJ_NOTNULL(sc, STREAM_CLOSE_MAGIC);
VSLb_ts_busyobj(bo, "Bereq", W_TIM_real(wrk));
+ bo->send_failed = 0;
return (0);
}
@@ -171,7 +167,7 @@ V1F_FetchRespHdr(struct busyobj *bo)
struct http *hp;
int i;
- double t;
+ vtim_real t;
struct http_conn *htc;
enum htc_status_e hs;
@@ -190,7 +186,9 @@ V1F_FetchRespHdr(struct busyobj *bo)
CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
CHECK_OBJ_NOTNULL(bo->htc, HTTP_CONN_MAGIC);
- t = VTIM_real() + htc->first_byte_timeout;
+ t = VTIM_real();
+ if (!bo->send_failed)
+ t += htc->first_byte_timeout;
hs = HTC_RxStuff(htc, HTTP1_Complete, NULL, NULL,
t, NAN, htc->between_bytes_timeout, cache_param->http_resp_size);
if (hs != HTC_S_COMPLETE) {
@@ -214,6 +212,8 @@ V1F_FetchRespHdr(struct busyobj *bo)
htc->doclose = SC_RX_OVERFLOW;
break;
case HTC_S_IDLE:
+ if (bo->send_failed)
+ break;
VSLb(bo->vsl, SLT_FetchError, "first byte timeout");
htc->doclose = SC_RX_TIMEOUT;
break;
diff --git a/bin/varnishtest/tests/b00073.vtc b/bin/varnishtest/tests/b00073.vtc
index 3372ba094..3aea0207e 100644
--- a/bin/varnishtest/tests/b00073.vtc
+++ b/bin/varnishtest/tests/b00073.vtc
@@ -27,13 +27,20 @@ server s1 {
expect req.http.unset-connection == true
txresp
expect_close
+
+ accept
+ rxreq
} -start
varnish v1 -vcl+backend {
+ backend bad { .host = "${bad_backend}"; }
sub vcl_recv {
return (pass);
}
sub vcl_backend_fetch {
+ if (bereq.http.fail == "send") {
+ set bereq.backend = bad;
+ }
set bereq.http.connection = bereq.http.bereq-connection;
}
sub vcl_backend_response {
@@ -61,9 +68,19 @@ client c1 {
txreq -hdr "bereq-connection: close" -hdr "unset-connection: true"
rxresp
expect resp.status == 200
+
+ txreq -hdr "fail: fetch"
+ rxresp
+ expect resp.status == 503
} -run
server s1 -wait
+client c2 {
+ txreq -hdr "fail: send"
+ rxresp
+ expect resp.status == 503
+} -run
+
varnish v1 -expect MAIN.backend_recycle == 0
varnish v1 -expect VBE.vcl1.s1.conn == 0
diff --git a/bin/varnishtest/tests/f00001.vtc b/bin/varnishtest/tests/f00001.vtc
index 2c756c9cd..9d52b06fc 100644
--- a/bin/varnishtest/tests/f00001.vtc
+++ b/bin/varnishtest/tests/f00001.vtc
@@ -4,7 +4,7 @@ varnishtest "Check that we handle bogusly large chunks correctly"
server s1 {
rxreq
- txresp
+ txresp -status 400
} -start
varnish v1 -vcl+backend {
@@ -18,7 +18,7 @@ client c1 {
send "0\r\n\r\n"
rxresp
- expect resp.status == 503
+ expect resp.status == 400
} -run
# Check that the published workaround does not cause harm
diff --git a/bin/varnishtest/tests/r02332.vtc b/bin/varnishtest/tests/r02332.vtc
new file mode 100644
index 000000000..b76d8a3d2
--- /dev/null
+++ b/bin/varnishtest/tests/r02332.vtc
@@ -0,0 +1,99 @@
+varnishtest "Fetch beresp despite failure to send bereq"
+
+barrier b1 cond 2
+barrier b2 cond 2
+
+# bo->send_failed
+
+server s1 {
+ rxreqhdrs
+ txresp -status 400 -body "Invalid request"
+} -start
+
+varnish v1 -vcl+backend {
+ sub vcl_backend_fetch {
+ if (bereq.http.ignore == "bgfetch") {
+ return (abandon);
+ }
+ return (fetch); # don't unset bereq.body
+ }
+} -start
+
+logexpect l1 -v v1 -g raw -i BackendClose {
+ expect 0 1002 BackendClose "s1 close"
+ expect 1 1007 BackendClose "s1 close"
+} -start
+
+client c1 {
+ txreq -method POST -hdr "Transfer-Encoding: chunked"
+ chunkedlen 100
+ barrier b1 sync
+ non_fatal
+ chunkedlen 100
+ chunkedlen 0
+ fatal
+ rxresp
+ expect resp.status == 400
+ expect resp.body == "Invalid request"
+} -start
+
+server s1 -wait
+barrier b1 sync
+client c1 -wait
+
+# bo->send_failed && 304
+
+server s1 {
+ rxreq
+ txresp -hdr "Cache-Control: public, max-age=1" -hdr {Etag: "abc"} \
+ -hdr "version: original" -body can-revalidate
+
+ rxreqhdrs
+ expect req.http.if-none-match == {"abc"}
+ txresp -status 304 -hdr "Cache-Control: public" -hdr {Etag: "abc"} \
+ -hdr "version: refreshed"
+} -start
+
+client c2 {
+ txreq -hdr "Transfer-Encoding: chunked"
+ chunkedlen 100
+ chunkedlen 100
+ chunkedlen 0
+ rxresp
+ expect resp.http.etag == {"abc"}
+ expect resp.http.version == original
+ expect resp.body == can-revalidate
+
+ delay 2
+
+ # bereq.send_failed during grace
+ txreq -hdr "Transfer-Encoding: chunked"
+ chunkedlen 100
+ barrier b2 sync
+ non_fatal
+ chunkedlen 100
+ chunkedlen 0
+ fatal
+ rxresp
+ expect resp.status == 200
+ expect resp.http.etag == {"abc"}
+ expect resp.http.version == original
+ expect resp.body == can-revalidate
+} -start
+
+server s1 -wait
+barrier b2 sync
+client c2 -wait
+
+client c3 {
+ txreq -hdr "Transfer-Encoding: chunked" -hdr "ignore: bgfetch"
+ chunkedlen 100
+ chunkedlen 100
+ chunkedlen 0
+ rxresp
+ expect resp.http.etag == {"abc"}
+ expect resp.http.version == original
+ expect resp.body == can-revalidate
+} -run
+
+logexpect l1 -wait
diff --git a/doc/sphinx/reference/directors.rst b/doc/sphinx/reference/directors.rst
index 66c0f406c..30888a99e 100644
--- a/doc/sphinx/reference/directors.rst
+++ b/doc/sphinx/reference/directors.rst
@@ -195,7 +195,9 @@ statistics, it is essentially a director which state is a ``struct
backend``. Varnish native backends currently speak HTTP/1 over TCP or
UDS, and as such, you need to make your own custom backend if you want
Varnish to do otherwise such as connect over UDP or speak a different
-protocol.
+protocol. A custom backend implementation must implement the ``gethdrs``
+method, and optionally ``http1pipe``. It is the responsibility of the
+custom backend to raise the ``send_failed`` flag from ``struct busyobj``.
If you want to leverage probes declarations in VCL, which have the advantage of
being reusable since they are only specifications, you can. However, you need
diff --git a/include/tbl/bo_flags.h b/include/tbl/bo_flags.h
index 4a4f1dd4b..624494d43 100644
--- a/include/tbl/bo_flags.h
+++ b/include/tbl/bo_flags.h
@@ -44,6 +44,7 @@ BO_FLAG(was_304, 0, 1, 0, 0, "")
BO_FLAG(is_bgfetch, 1, 0, 0, 0, "")
BO_FLAG(is_hitmiss, 1, 0, 0, 0, "")
BO_FLAG(is_hitpass, 1, 0, 0, 0, "")
+BO_FLAG(send_failed, 0, 0, 0, 0, "")
#undef BO_FLAG
/*lint -restore */
More information about the varnish-commit
mailing list