[master] 4130055c4 Resolve race on waitinglist rushing between OC_F_BUSY and boc->state

Martin Blix Grydeland martin at varnish-software.com
Fri Apr 26 08:51:08 UTC 2019


commit 4130055c457903e7f904fa56c0db58d7e7467dc1
Author: Martin Blix Grydeland <martin at varnish-software.com>
Date:   Thu Apr 25 13:39:59 2019 +0200

    Resolve race on waitinglist rushing between OC_F_BUSY and boc->state
    
    When an object is ready for delivery, HSH_Unbusy was called before calling
    ObjSetState([BOS_STREAM|BOS_FINISHED]). The HSH_Unbusy() call does the
    waitinglist rushing, but HSH_Lookup() wanted to look at the boc->state and
    if BOS_STREAM had been reached. This could cause requests woken to find
    that the stream state still hadn't been reached (ObjSetState still hadn't
    executed), and go back on the waitinglist.
    
    To fix this, this patch reverts commit
    0375791cac1f2333ab54932ba1d2025261082fab, and goes back to considering
    OC_F_BUSY as the gate keeper for HSH_Lookup. This eliminates the race,
    because HSH_Unbusy and HSH_Lookup then uses the same mutex.
    
    That change opens up the possiblity that req code after HSH_Lookup() sees
    an object that has not yet reached BOS_STREAM. In order to not have to add
    new ObjWaitState() calls (with the additional locking cost that would
    bring) to wait for BOS_STREAM, the order of events is changed throughout,
    and calls ObjSetState([BOS_STREAM|BOS_FINISHED]) before HSH_Unbusy(). That
    way, an object returned from HSH_Lookup() is guaranteed to be at least
    BOS_STREAM.

diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c
index b3304bedb..63738e1fe 100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@ -562,9 +562,8 @@ vbf_stp_fetch(struct worker *wrk, struct busyobj *bo)
 	assert(bo->fetch_objcore->boc->state == BOS_REQ_DONE);
 
 	if (bo->do_stream) {
-		ObjSetState(wrk, bo->fetch_objcore, BOS_PREP_STREAM);
-		HSH_Unbusy(wrk, bo->fetch_objcore);
 		ObjSetState(wrk, bo->fetch_objcore, BOS_STREAM);
+		HSH_Unbusy(wrk, bo->fetch_objcore);
 	}
 
 	VSLb(bo->vsl, SLT_Fetch_Body, "%u %s %s",
@@ -589,18 +588,20 @@ vbf_stp_fetchend(struct worker *wrk, struct busyobj *bo)
 	AZ(ObjSetU64(wrk, bo->fetch_objcore, OA_LEN,
 	    bo->fetch_objcore->boc->len_so_far));
 
-	if (bo->do_stream)
-		assert(bo->fetch_objcore->boc->state == BOS_STREAM);
-	else {
-		assert(bo->fetch_objcore->boc->state == BOS_REQ_DONE);
-		HSH_Unbusy(wrk, bo->fetch_objcore);
-	}
-
 	/* Recycle the backend connection before setting BOS_FINISHED to
 	   give predictable backend reuse behavior for varnishtest */
 	VDI_Finish(bo);
 
+	if (bo->do_stream)
+		assert(bo->fetch_objcore->boc->state == BOS_STREAM);
+	else
+		assert(bo->fetch_objcore->boc->state == BOS_REQ_DONE);
 	ObjSetState(wrk, bo->fetch_objcore, BOS_FINISHED);
+
+	if (!bo->do_stream)
+		HSH_Unbusy(wrk, bo->fetch_objcore);
+	AZ(bo->fetch_objcore->flags & OC_F_BUSY);
+
 	VSLb_ts_busyobj(bo, "BerespBody", W_TIM_real(wrk));
 	if (bo->stale_oc != NULL)
 		HSH_Kill(bo->stale_oc);
@@ -654,9 +655,8 @@ vbf_stp_condfetch(struct worker *wrk, struct busyobj *bo)
 	AZ(ObjCopyAttr(bo->wrk, bo->fetch_objcore, bo->stale_oc, OA_GZIPBITS));
 
 	if (bo->do_stream) {
-		ObjSetState(wrk, bo->fetch_objcore, BOS_PREP_STREAM);
-		HSH_Unbusy(wrk, bo->fetch_objcore);
 		ObjSetState(wrk, bo->fetch_objcore, BOS_STREAM);
+		HSH_Unbusy(wrk, bo->fetch_objcore);
 	}
 
 	if (ObjIterate(wrk, bo->stale_oc, bo, vbf_objiterator, 0))
@@ -790,10 +790,10 @@ vbf_stp_error(struct worker *wrk, struct busyobj *bo)
 	}
 	AZ(ObjSetU64(wrk, bo->fetch_objcore, OA_LEN, o));
 	VSB_destroy(&synth_body);
+	ObjSetState(wrk, bo->fetch_objcore, BOS_FINISHED);
 	HSH_Unbusy(wrk, bo->fetch_objcore);
 	if (stale != NULL && bo->fetch_objcore->ttl > 0)
 		HSH_Kill(stale);
-	ObjSetState(wrk, bo->fetch_objcore, BOS_FINISHED);
 	return (F_STP_DONE);
 }
 
@@ -808,10 +808,10 @@ vbf_stp_fail(struct worker *wrk, const struct busyobj *bo)
 	CHECK_OBJ_NOTNULL(bo->fetch_objcore, OBJCORE_MAGIC);
 
 	assert(bo->fetch_objcore->boc->state < BOS_FINISHED);
+	ObjSetState(wrk, bo->fetch_objcore, BOS_FAILED);
 	HSH_Fail(bo->fetch_objcore);
 	if (!(bo->fetch_objcore->flags & OC_F_BUSY))
 		HSH_Kill(bo->fetch_objcore);
-	ObjSetState(wrk, bo->fetch_objcore, BOS_FAILED);
 	return (F_STP_DONE);
 }
 
@@ -982,8 +982,6 @@ VBF_Fetch(struct worker *wrk, struct req *req, struct objcore *oc,
 			ObjWaitState(oc, BOS_STREAM);
 			if (oc->boc->state == BOS_FAILED) {
 				AN((oc->flags & OC_F_FAILED));
-			} else {
-				AZ(oc->flags & OC_F_BUSY);
 			}
 		}
 	}
diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index ead55932c..66167df33 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -420,11 +420,11 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 			continue;
 
 		CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
-		if (oc->boc != NULL && oc->boc->state < BOS_STREAM) {
+		if (oc->flags & OC_F_BUSY) {
 			if (req->hash_ignore_busy)
 				continue;
 
-			if (oc->boc->vary != NULL &&
+			if (oc->boc != NULL && oc->boc->vary != NULL &&
 			    !VRY_Match(req, oc->boc->vary))
 				continue;
 
diff --git a/bin/varnishd/cache/cache_obj.c b/bin/varnishd/cache/cache_obj.c
index b533eefcf..89bb0497d 100644
--- a/bin/varnishd/cache/cache_obj.c
+++ b/bin/varnishd/cache/cache_obj.c
@@ -267,7 +267,6 @@ ObjSetState(struct worker *wrk, const struct objcore *oc,
 	assert(next > oc->boc->state);
 
 	CHECK_OBJ_ORNULL(oc->stobj->stevedore, STEVEDORE_MAGIC);
-	assert(next != BOS_STREAM || oc->boc->state == BOS_PREP_STREAM);
 	assert(next != BOS_FINISHED || (oc->oa_present & (1 << OA_LEN)));
 
 	if (oc->stobj->stevedore != NULL) {
diff --git a/include/tbl/boc_state.h b/include/tbl/boc_state.h
index a5c4a61ff..751456ca6 100644
--- a/include/tbl/boc_state.h
+++ b/include/tbl/boc_state.h
@@ -30,7 +30,6 @@
 
 BOC_STATE(INVALID,	invalid)	/* don't touch (yet) */
 BOC_STATE(REQ_DONE,	req_done)	/* bereq.* can be examined */
-BOC_STATE(PREP_STREAM,	prep_stream)	/* Prepare for streaming */
 BOC_STATE(STREAM,	stream)		/* beresp.* can be examined */
 BOC_STATE(FINISHED,	finished)	/* object is complete */
 BOC_STATE(FAILED,	failed)		/* something went wrong */


More information about the varnish-commit mailing list