[master] 59367a6ce Always reschedule requests from the waiting list.

Tue Aug 28 20:28:08 UTC 2018

commit 59367a6ce989f3b3cf237a135ade4a94641fbfc5
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date:   Tue Aug 28 20:27:37 2018 +0000

    Always reschedule requests from the waiting list.
    
    Previously we used various heuristics (test TCP connection) to avoid
    the reschedule if the req was abandonned while on the waiting list.
    
    We also subjected the rescheduling to pool-queue limits like any
    new request.
    
    This was all safe because we knew we could clean up the request
    state cheaply, even if it was somewhat cumbersome.
    
    Now vmods can have per-task PRIV's and we have no idea what it will
    cost us (stack, time, etc) to clean them up, so we cannot burden
    J.Random Request who happens to rush the waiting list with the burden.
    
    Fix this by always rescheduling, not subject to pool-queue limits,
    and eliminate all the special-casing for exceeded limits, including
    the debug feature to force a rescheduling failure and two tests
    exercising it.
    
    As a side effect of this, requests on the waiting list gets a
    "business class upgrade" over newly arriving requests when there
    are no worker threads available.  Given that these requests arrived
    earlier, and we already performed work on them, this seems only fair.
    
    Forced to pay proper attention by:      slink

diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index c5c5cb9b0..e8dc2f3de 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -217,10 +217,17 @@ struct pool_task {
  * tasks are taken off the queues in this order
  *
  * prios up to TASK_QUEUE_RESERVE are run from the reserve
+ *
+ * TASK_QUEUE_{REQ|STR} are new req's (H1/H2), and subject to queue limit.
+ *
+ * TASK_QUEUE_RUSH is req's returning from waiting list, they are
+ * not subject to TASK_QUEUE_CLIENT because we cannot safely clean
+ * them up if scheduling them fails.
  */
 enum task_prio {
 	TASK_QUEUE_BO,
 #define TASK_QUEUE_RESERVE	TASK_QUEUE_BO
+	TASK_QUEUE_RUSH,
 	TASK_QUEUE_REQ,
 	TASK_QUEUE_STR,
 	TASK_QUEUE_VCA,
diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 3fbbc702d..c3b652bbe 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -76,6 +76,7 @@ static struct objhead *private_oh;
 static void hsh_rush1(const struct worker *, struct objhead *,
     struct rush *, int);
 static void hsh_rush2(struct worker *, struct rush *);
+static int HSH_DerefObjHead(struct worker *wrk, struct objhead **poh);
 
 /*---------------------------------------------------------------------*/
 
@@ -591,8 +592,18 @@ hsh_rush2(struct worker *wrk, struct rush *r)
 		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
 		VTAILQ_REMOVE(&r->reqs, req, w_list);
 		DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
-		AN(req->transport->reembark);
-		req->transport->reembark(wrk, req);
+		if (req->transport->reembark != NULL) {
+			// For ESI includes
+			req->transport->reembark(wrk, req);
+		} else {
+			/*
+			 * We ignore the queue limits which apply to new
+			 * requests because if we fail to reschedule there
+			 * may be vmod_privs to cleanup and we need a proper
+			 * workerthread for that.
+			 */
+			AZ(Pool_Task(req->sp->pool, &req->task, TASK_QUEUE_RUSH));
+		}
 	}
 }
 
@@ -939,7 +950,7 @@ HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
 	return (0);
 }
 
-int
+static int
 HSH_DerefObjHead(struct worker *wrk, struct objhead **poh)
 {
 	struct objhead *oh;
diff --git a/bin/varnishd/cache/cache_objhead.h b/bin/varnishd/cache/cache_objhead.h
index 6bcfbd9b9..6bcb7b301 100644
--- a/bin/varnishd/cache/cache_objhead.h
+++ b/bin/varnishd/cache/cache_objhead.h
@@ -63,7 +63,6 @@ int HSH_Snipe(const struct worker *, struct objcore *);
 struct boc *HSH_RefBoc(const struct objcore *);
 void HSH_DerefBoc(struct worker *wrk, struct objcore *);
 void HSH_DeleteObjHead(const struct worker *, struct objhead *);
-int HSH_DerefObjHead(struct worker *, struct objhead **);
 
 int HSH_DerefObjCore(struct worker *, struct objcore **, int rushmax);
 #define HSH_RUSH_POLICY -1
diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
index a799a9f9a..deecc3434 100644
--- a/bin/varnishd/cache/cache_req_fsm.c
+++ b/bin/varnishd/cache/cache_req_fsm.c
@@ -50,34 +50,6 @@
 #include "vsha256.h"
 #include "vtim.h"
 
-/*--------------------------------------------------------------------
- * Reschedule a request from the waiting list
- */
-
-int
-CNT_Reembark(struct worker *wrk, struct req *req)
-{
-
-	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-
-	if (!DO_DEBUG(DBG_FAILRESCHED) &&
-	    !SES_Reschedule_Req(req, TASK_QUEUE_REQ))
-		return (0);
-
-	/* Couldn't schedule, ditch */
-	wrk->stats->busy_wakeup--;
-	wrk->stats->busy_killed++;
-	VSLb(req->vsl, SLT_Error, "Fail to reschedule req from waiting list");
-
-	AN(req->ws->r);
-	WS_Release(req->ws, 0);
-	AN(req->hash_objhead);
-	(void)HSH_DerefObjHead(wrk, &req->hash_objhead);
-	AZ(req->hash_objhead);
-	return(-1);
-}
-
 /*--------------------------------------------------------------------
  * Handle "Expect:" and "Connection:" on incoming request
  */
diff --git a/bin/varnishd/cache/cache_session.c b/bin/varnishd/cache/cache_session.c
index 0500bcef2..20ada52e3 100644
--- a/bin/varnishd/cache/cache_session.c
+++ b/bin/varnishd/cache/cache_session.c
@@ -376,30 +376,6 @@ SES_New(struct pool *pp)
 	return (sp);
 }
 
-/*--------------------------------------------------------------------
- * Reschedule a request on a work-thread from its sessions pool
- *
- * This is used to reschedule requests waiting on busy objects
- */
-
-int
-SES_Reschedule_Req(struct req *req, enum task_prio prio)
-{
-	struct sess *sp;
-	struct pool *pp;
-
-	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-	sp = req->sp;
-	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
-	pp = sp->pool;
-	CHECK_OBJ_NOTNULL(pp, POOL_MAGIC);
-	AN(TASK_QUEUE_CLIENT(prio));
-
-	AN(req->task.func);
-
-	return (Pool_Task(pp, &req->task, prio));
-}
-
 /*--------------------------------------------------------------------
  * Handle a session (from waiter)
  */
diff --git a/bin/varnishd/cache/cache_varnishd.h b/bin/varnishd/cache/cache_varnishd.h
index de45a0fc0..52a7e9622 100644
--- a/bin/varnishd/cache/cache_varnishd.h
+++ b/bin/varnishd/cache/cache_varnishd.h
@@ -335,7 +335,6 @@ enum req_fsm_nxt {
 };
 
 enum req_fsm_nxt CNT_Request(struct worker *, struct req *);
-int CNT_Reembark(struct worker *, struct req *);
 
 /* cache_session.c */
 void SES_NewPool(struct pool *, unsigned pool_no);
@@ -343,7 +342,6 @@ void SES_DestroyPool(struct pool *);
 void SES_Wait(struct sess *, const struct transport *);
 void SES_Ref(struct sess *sp);
 void SES_Rel(struct sess *sp);
-int SES_Reschedule_Req(struct req *, enum task_prio);
 
 const char * HTC_Status(enum htc_status_e);
 void HTC_RxInit(struct http_conn *htc, struct ws *ws);
diff --git a/bin/varnishd/http1/cache_http1_fsm.c b/bin/varnishd/http1/cache_http1_fsm.c
index ed3c9c8f4..30bd3b894 100644
--- a/bin/varnishd/http1/cache_http1_fsm.c
+++ b/bin/varnishd/http1/cache_http1_fsm.c
@@ -201,25 +201,6 @@ http1_req_cleanup(struct sess *sp, struct worker *wrk, struct req *req)
 	return (0);
 }
 
-/*----------------------------------------------------------------------
- * Clean up a req from waiting list which cannot complete
- */
-
-static void v_matchproto_(vtr_reembark_f)
-http1_reembark(struct worker *wrk, struct req *req)
-{
-
-	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-	assert(req->transport == &HTTP1_transport);
-
-	if (!CNT_Reembark(wrk, req))
-		return;
-
-	SES_Close(req->sp, SC_OVERLOAD);
-	AN(http1_req_cleanup(req->sp, wrk, req));
-}
-
 static int v_matchproto_(vtr_minimal_response_f)
 http1_minimal_response(struct req *req, uint16_t status)
 {
@@ -263,7 +244,6 @@ struct transport HTTP1_transport = {
 	.deliver =		V1D_Deliver,
 	.minimal_response =	http1_minimal_response,
 	.new_session =		http1_new_session,
-	.reembark =		http1_reembark,
 	.req_body =		http1_req_body,
 	.req_fail =		http1_req_fail,
 	.req_panic =		http1_req_panic,
diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c
index 8788dc45a..5985fe472 100644
--- a/bin/varnishd/http2/cache_http2_session.c
+++ b/bin/varnishd/http2/cache_http2_session.c
@@ -434,32 +434,12 @@ h2_new_session(struct worker *wrk, void *arg)
 	h2_del_sess(wrk, h2, SC_RX_JUNK);
 }
 
-static void v_matchproto_(vtr_reembark_f)
-h2_reembark(struct worker *wrk, struct req *req)
-{
-	struct h2_req *r2;
-
-	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-	assert(req->transport == &H2_transport);
-
-	if (!CNT_Reembark(wrk, req))
-		return;
-
-	CAST_OBJ_NOTNULL(r2, req->transport_priv, H2_REQ_MAGIC);
-	assert(r2->state == H2_S_CLOS_REM);
-	AN(r2->scheduled);
-	r2->scheduled = 0;
-	r2->h2sess->do_sweep = 1;
-}
-
 struct transport H2_transport = {
 	.name =			"H2",
 	.magic =		TRANSPORT_MAGIC,
 	.deliver =		h2_deliver,
 	.minimal_response =	h2_minimal_response,
 	.new_session =		h2_new_session,
-	.reembark =		h2_reembark,
 	.req_body =		h2_req_body,
 	.req_fail =		h2_req_fail,
 	.sess_panic =		h2_sess_panic,
diff --git a/bin/varnishtest/tests/c00013.vtc b/bin/varnishtest/tests/c00013.vtc
index 1fcccac4a..ef4cc76f8 100644
--- a/bin/varnishtest/tests/c00013.vtc
+++ b/bin/varnishtest/tests/c00013.vtc
@@ -50,60 +50,3 @@ varnish v1 -vsl_catchup
 varnish v1 -expect busy_sleep >= 1
 varnish v1 -expect busy_wakeup >= 1
 varnish v1 -stop
-
-##################################################
-# Now try again where getting a thread fails
-
-barrier b3 cond 2
-barrier b4 cond 2
-
-server s3 {
-	rxreq
-	expect req.url == "/foo"
-	send "HTTP/1.0 200 OK\r\nConnection: close\r\n\r\n"
-	delay .2
-	barrier b3 sync
-	delay .2
-	send "line1\n"
-	delay .2
-	barrier b4 sync
-	send "line2\n"
-} -start
-
-varnish v3 -vcl+backend {
-	sub vcl_backend_fetch {
-		set bereq.backend = s3;
-	}
-	sub vcl_backend_response {
-		set beresp.do_stream = false;
-	}
-} -start
-
-varnish v3 -cliok "param.set debug +failresched"
-
-varnish v3 -cliok "param.set debug +syncvsl"
-
-client c3 -connect ${v3_sock} {
-	txreq -url "/foo" -hdr "client: c3"
-	rxresp
-	expect resp.status == 200
-	expect resp.bodylen == 12
-	expect resp.http.x-varnish == "1001"
-} -start
-
-barrier b3 sync
-
-client c4 -connect ${v3_sock} {
-	txreq -url "/foo" -hdr "client: c4"
-	delay .2
-	barrier b4 sync
-	expect_close
-} -run
-
-client c3 -wait
-
-varnish v1 -vsl_catchup
-varnish v3 -expect busy_sleep >= 1
-varnish v3 -expect busy_wakeup == 0
-varnish v3 -expect busy_killed == 1
-varnish v3 -expect sc_overload == 1
diff --git a/bin/varnishtest/tests/r02563.vtc b/bin/varnishtest/tests/r02563.vtc
deleted file mode 100644
index bfe3514b9..000000000
--- a/bin/varnishtest/tests/r02563.vtc
+++ /dev/null
@@ -1,64 +0,0 @@
-varnishtest "#2563: Panic after reembark failure"
-
-barrier b1 cond 2
-barrier b2 cond 2
-
-server s1 {
-	rxreq
-	expect req.url == "/foo"
-	expect req.http.client == "c1"
-	send "HTTP/1.0 200 OK\r\nConnection: close\r\n\r\n"
-	delay .2
-	barrier b1 sync
-	delay .2
-	send "line1\n"
-	delay .2
-	barrier b2 sync
-	send "line2\n"
-} -start
-
-varnish v1 -vcl+backend {
-	sub vcl_backend_response {
-		set beresp.do_stream = false;
-	}
-} -start
-
-varnish v1 -cliok "param.set feature +http2"
-varnish v1 -cliok "param.set debug +failresched"
-varnish v1 -cliok "param.set debug +waitinglist"
-varnish v1 -cliok "param.set debug +syncvsl"
-
-client c1 {
-	txreq -url "/foo" -hdr "client: c1"
-	rxresp
-	expect resp.status == 200
-	expect resp.bodylen == 12
-	expect resp.http.x-varnish == "1001"
-} -start
-
-barrier b1 sync
-
-client c2 {
-	stream 1 {
-		txreq -url "/foo"
-		delay .2
-		barrier b2 sync
-		rxrst
-		expect rst.err == REFUSED_STREAM
-	} -start
-
-	stream 3 {
-		delay 1
-		txreq -url "/foo"
-		rxresp
-	} -run
-
-	stream 1 -wait
-} -run
-
-client c1 -wait
-
-varnish v1 -vsl_catchup
-varnish v1 -expect busy_sleep >= 1
-varnish v1 -expect busy_wakeup == 0
-varnish v1 -expect busy_killed == 1
diff --git a/include/tbl/debug_bits.h b/include/tbl/debug_bits.h
index b451f3332..4ac3ef60f 100644
--- a/include/tbl/debug_bits.h
+++ b/include/tbl/debug_bits.h
@@ -50,7 +50,6 @@ DEBUG_BIT(H2_NOCHECK,		h2_nocheck,	"Disable various H2 checks")
 DEBUG_BIT(VMOD_SO_KEEP,		vmod_so_keep,	"Keep copied VMOD libraries")
 DEBUG_BIT(PROCESSORS,		processors,	"Fetch/Deliver processors")
 DEBUG_BIT(PROTOCOL,		protocol,	"Protocol debugging")
-DEBUG_BIT(FAILRESCHED,		failresched,	"Fail from waiting list")
 #undef DEBUG_BIT
 
 /*lint -restore */