[master] 3d723fd Rework the busyobj handling.

Martin Blix Grydeland martin at varnish-cache.org
Tue Dec 6 11:20:12 CET 2011


commit 3d723fdab10f10c7f062e8350e1b81a341b28a21
Author: Martin Blix Grydeland <martin at varnish-software.com>
Date:   Tue Dec 6 10:37:40 2011 +0100

    Rework the busyobj handling.
    
    This patch reworks busyobj handling so that busyobjs are owned by the issuing worker, and the worker should explicitly release it when done with it. A busy objcore only lends a pointer to it. This is to fix the current situation where the owning party is murky, and the current code will leak busyobjs in at least one situation.
    
    BusyObj's are reference counted in preperation for the streaming code.
    
    Patch also adds several asserts to help make sure the busyobjs are sound.
    
    Fixes: #1068

diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index 327682c..347de56 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -293,7 +293,7 @@ struct worker {
 	struct objhead		*nobjhead;
 	struct objcore		*nobjcore;
 	struct waitinglist	*nwaitinglist;
-	struct busyobj		*nbusyobj;
+	/* struct busyobj		*nbusyobj; */
 	void			*nhashpriv;
 	struct dstat		stats;
 
@@ -496,6 +496,8 @@ oc_getlru(const struct objcore *oc)
 struct busyobj {
 	unsigned		magic;
 #define BUSYOBJ_MAGIC		0x23b95567
+	unsigned		refcount;
+
 	uint8_t			*vary;
 	unsigned		is_gzip;
 	unsigned		is_gunzip;
@@ -667,8 +669,14 @@ void VDI_RecycleFd(struct worker *wrk, struct vbc **vbp);
 void VDI_AddHostHeader(struct worker *wrk, const struct vbc *vbc);
 void VBE_Poll(void);
 
-/* cache_backend_cfg.c */
+/* cache_backend.c */
 void VBE_Init(void);
+struct busyobj *VBE_GetBusyObj(void);
+struct busyobj *VBE_RefBusyObj(struct busyobj *busyobj);
+void VBE_DerefBusyObj(struct busyobj **busyobj);
+
+/* cache_backend_cfg.c */
+void VBE_InitCfg(void);
 struct backend *VBE_AddBackend(struct cli *cli, const struct vrt_backend *vb);
 
 /* cache_backend_poll.c */
@@ -995,21 +1003,6 @@ void SMP_Init(void);
 void SMP_Ready(void);
 void SMP_NewBan(const uint8_t *ban, unsigned len);
 
-#define New_BusyObj(wrk)						\
-	do {								\
-		if (wrk->nbusyobj != NULL) {				\
-			CHECK_OBJ_NOTNULL(wrk->nbusyobj, BUSYOBJ_MAGIC);\
-			wrk->busyobj = wrk->nbusyobj;			\
-			wrk->nbusyobj = NULL;				\
-			memset(wrk->busyobj, 0, sizeof *wrk->busyobj);	\
-			wrk->busyobj->magic = BUSYOBJ_MAGIC;		\
-		} else {						\
-			ALLOC_OBJ(wrk->busyobj, BUSYOBJ_MAGIC);		\
-		}							\
-		CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC);		\
-		AZ(wrk->nbusyobj);					\
-	} while (0)
-
 /*
  * A normal pointer difference is signed, but we never want a negative value
  * so this little tool will make sure we don't get that.
@@ -1064,6 +1057,7 @@ AssertObjBusy(const struct object *o)
 {
 	AN(o->objcore);
 	AN (o->objcore->flags & OC_F_BUSY);
+	AN(o->objcore->busyobj);
 }
 
 static inline void
diff --git a/bin/varnishd/cache/cache_backend.c b/bin/varnishd/cache/cache_backend.c
index b661640..263aa17 100644
--- a/bin/varnishd/cache/cache_backend.c
+++ b/bin/varnishd/cache/cache_backend.c
@@ -41,6 +41,91 @@
 #include "vrt.h"
 #include "vtcp.h"
 
+static struct lock nbusyobj_mtx;
+static struct busyobj *nbusyobj;
+
+void
+VBE_Init(void)
+{
+	Lck_New(&nbusyobj_mtx, lck_nbusyobj);
+	nbusyobj = NULL;
+}
+
+/*--------------------------------------------------------------------
+ * BusyObj handling
+ */
+
+static struct busyobj *
+vbe_NewBusyObj(void)
+{
+	struct busyobj *busyobj;
+
+	ALLOC_OBJ(busyobj, BUSYOBJ_MAGIC);
+	AN(busyobj);
+	return (busyobj);
+}
+
+static void
+vbe_FreeBusyObj(struct busyobj *busyobj)
+{
+	CHECK_OBJ_NOTNULL(busyobj, BUSYOBJ_MAGIC);
+	AZ(busyobj->refcount);
+	FREE_OBJ(busyobj);
+}
+
+struct busyobj *
+VBE_GetBusyObj(void)
+{
+	struct busyobj *busyobj = NULL;
+
+	Lck_Lock(&nbusyobj_mtx);
+	if (nbusyobj != NULL) {
+		CHECK_OBJ_NOTNULL(nbusyobj, BUSYOBJ_MAGIC);
+		busyobj = nbusyobj;
+		nbusyobj = NULL;
+		memset(busyobj, 0, sizeof *busyobj);
+		busyobj->magic = BUSYOBJ_MAGIC;
+	}
+	Lck_Unlock(&nbusyobj_mtx);
+	if (busyobj == NULL)
+		busyobj = vbe_NewBusyObj();
+	AN(busyobj);
+	busyobj->refcount = 1;
+	return (busyobj);
+}
+
+struct busyobj *
+VBE_RefBusyObj(struct busyobj *busyobj)
+{
+	CHECK_OBJ_NOTNULL(busyobj, BUSYOBJ_MAGIC);
+	assert(busyobj->refcount > 0);
+	busyobj->refcount++;
+	return (busyobj);
+}
+
+void
+VBE_DerefBusyObj(struct busyobj **pbo)
+{
+	struct busyobj *busyobj;
+
+	busyobj = *pbo;
+	CHECK_OBJ_NOTNULL(busyobj, BUSYOBJ_MAGIC);
+	assert(busyobj->refcount > 0);
+	busyobj->refcount--;
+	*pbo = NULL;
+	if (busyobj->refcount > 0)
+		return;
+	/* XXX Sanity checks e.g. AZ(busyobj->vbc) */
+	Lck_Lock(&nbusyobj_mtx);
+	if (nbusyobj == NULL) {
+		nbusyobj = busyobj;
+		busyobj = NULL;
+	}
+	Lck_Unlock(&nbusyobj_mtx);
+	if (busyobj != NULL)
+		vbe_FreeBusyObj(busyobj);
+}
+
 /*--------------------------------------------------------------------
  * The "simple" director really isn't, since thats where all the actual
  * connections happen.  Nontheless, pretend it is simple by sequestering
diff --git a/bin/varnishd/cache/cache_backend_cfg.c b/bin/varnishd/cache/cache_backend_cfg.c
index cbb8c85..a47aa3c 100644
--- a/bin/varnishd/cache/cache_backend_cfg.c
+++ b/bin/varnishd/cache/cache_backend_cfg.c
@@ -499,7 +499,7 @@ static struct cli_proto backend_cmds[] = {
 /*---------------------------------------------------------------------*/
 
 void
-VBE_Init(void)
+VBE_InitCfg(void)
 {
 
 	Lck_New(&VBE_mtx, lck_vbe);
diff --git a/bin/varnishd/cache/cache_center.c b/bin/varnishd/cache/cache_center.c
index 079df44..995268a 100644
--- a/bin/varnishd/cache/cache_center.c
+++ b/bin/varnishd/cache/cache_center.c
@@ -179,8 +179,11 @@ cnt_prepresp(struct sess *sp)
 	CHECK_OBJ_NOTNULL(wrk->obj, OBJECT_MAGIC);
 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
 
-	if (wrk->busyobj != NULL && wrk->busyobj->do_stream)
+	if (wrk->busyobj != NULL) {
+		CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC);
+		AN(wrk->busyobj->do_stream);
 		AssertObjCorePassOrBusy(wrk->obj->objcore);
+	}
 
 	wrk->res_mode = 0;
 
@@ -248,9 +251,11 @@ cnt_prepresp(struct sess *sp)
 	case VCL_RET_RESTART:
 		if (sp->restarts >= cache_param->max_restarts)
 			break;
-		if (wrk->busyobj->do_stream) {
+		if (wrk->busyobj != NULL) {
+			AN(wrk->busyobj->do_stream);
 			VDI_CloseFd(wrk, &wrk->busyobj->vbc);
 			HSH_Drop(wrk);
+			VBE_DerefBusyObj(&wrk->busyobj);
 		} else {
 			(void)HSH_Deref(wrk, NULL, &wrk->obj);
 		}
@@ -298,6 +303,7 @@ cnt_deliver(struct sess *sp)
 	wrk = sp->wrk;
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 
+	AZ(sp->wrk->busyobj);
 	sp->director = NULL;
 	sp->restarts = 0;
 
@@ -338,6 +344,7 @@ cnt_done(struct sess *sp)
 	CHECK_OBJ_ORNULL(sp->vcl, VCL_CONF_MAGIC);
 
 	AZ(wrk->obj);
+	AZ(wrk->busyobj);
 	sp->director = NULL;
 	sp->restarts = 0;
 
@@ -458,7 +465,8 @@ cnt_error(struct sess *sp)
 
 	if (wrk->obj == NULL) {
 		HSH_Prealloc(sp);
-		New_BusyObj(wrk);
+		AZ(wrk->busyobj);
+		wrk->busyobj = VBE_GetBusyObj();
 		wrk->obj = STV_NewObject(wrk, NULL, cache_param->http_resp_size,
 		     (uint16_t)cache_param->http_max_hdr);
 		if (wrk->obj == NULL)
@@ -477,6 +485,7 @@ cnt_error(struct sess *sp)
 		wrk->obj->xid = sp->xid;
 		wrk->obj->exp.entered = sp->t_req;
 	} else {
+		CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC);
 		/* XXX: Null the headers ? */
 	}
 	CHECK_OBJ_NOTNULL(wrk->obj, OBJECT_MAGIC);
@@ -501,6 +510,7 @@ cnt_error(struct sess *sp)
 	if (sp->handling == VCL_RET_RESTART &&
 	    sp->restarts <  cache_param->max_restarts) {
 		HSH_Drop(wrk);
+		VBE_DerefBusyObj(&wrk->busyobj);
 		sp->director = NULL;
 		sp->restarts++;
 		sp->step = STP_RECV;
@@ -517,6 +527,7 @@ cnt_error(struct sess *sp)
 	sp->err_code = 0;
 	sp->err_reason = NULL;
 	http_Setup(wrk->bereq, NULL);
+	VBE_DerefBusyObj(&wrk->busyobj);
 	sp->step = STP_PREPRESP;
 	return (0);
 }
@@ -645,6 +656,7 @@ cnt_fetch(struct sess *sp)
 		AZ(HSH_Deref(wrk, wrk->objcore, NULL));
 		wrk->objcore = NULL;
 	}
+	VBE_DerefBusyObj(&wrk->busyobj);
 	http_Setup(wrk->bereq, NULL);
 	http_Setup(wrk->beresp, NULL);
 	sp->director = NULL;
@@ -819,6 +831,7 @@ cnt_fetchbody(struct sess *sp)
 		sp->err_code = 503;
 		sp->step = STP_ERROR;
 		VDI_CloseFd(wrk, &wrk->busyobj->vbc);
+		VBE_DerefBusyObj(&wrk->busyobj);
 		return (0);
 	}
 	CHECK_OBJ_NOTNULL(wrk->obj, OBJECT_MAGIC);
@@ -887,6 +900,7 @@ cnt_fetchbody(struct sess *sp)
 
 	if (i) {
 		HSH_Drop(wrk);
+		VBE_DerefBusyObj(&wrk->busyobj);
 		AZ(wrk->obj);
 		sp->err_code = 503;
 		sp->step = STP_ERROR;
@@ -899,6 +913,7 @@ cnt_fetchbody(struct sess *sp)
 		AN(wrk->obj->objcore->ban);
 		HSH_Unbusy(wrk);
 	}
+	VBE_DerefBusyObj(&wrk->busyobj);
 	wrk->acct_tmp.fetch++;
 	sp->step = STP_PREPRESP;
 	return (0);
@@ -972,6 +987,7 @@ cnt_streambody(struct sess *sp)
 	assert(WRW_IsReleased(wrk));
 	assert(wrk->wrw.ciov == wrk->wrw.siov);
 	(void)HSH_Deref(wrk, NULL, &wrk->obj);
+	VBE_DerefBusyObj(&wrk->busyobj);
 	http_Setup(wrk->resp, NULL);
 	sp->step = STP_DONE;
 	return (0);
@@ -1050,6 +1066,7 @@ cnt_hit(struct sess *sp)
 
 	CHECK_OBJ_NOTNULL(wrk->obj, OBJECT_MAGIC);
 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+	AZ(wrk->busyobj);
 
 	assert(!(wrk->obj->objcore->flags & OC_F_PASS));
 
@@ -1067,7 +1084,6 @@ cnt_hit(struct sess *sp)
 	/* Drop our object, we won't need it */
 	(void)HSH_Deref(wrk, NULL, &wrk->obj);
 	wrk->objcore = NULL;
-	wrk->busyobj = NULL;
 
 	switch(sp->handling) {
 	case VCL_RET_PASS:
@@ -1127,6 +1143,7 @@ cnt_lookup(struct sess *sp)
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 
 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+	AZ(wrk->busyobj);
 
 	if (sp->hash_objhead == NULL) {
 		/* Not a waiting list return */
@@ -1256,20 +1273,21 @@ cnt_miss(struct sess *sp)
 	wrk->connect_timeout = 0;
 	wrk->first_byte_timeout = 0;
 	wrk->between_bytes_timeout = 0;
-	CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC);
 
 	VCL_miss_method(sp);
-	CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC);
+
 	switch(sp->handling) {
 	case VCL_RET_ERROR:
 		AZ(HSH_Deref(wrk, wrk->objcore, NULL));
 		wrk->objcore = NULL;
 		http_Setup(wrk->bereq, NULL);
+		VBE_DerefBusyObj(&wrk->busyobj);
 		sp->step = STP_ERROR;
 		return (0);
 	case VCL_RET_PASS:
 		AZ(HSH_Deref(wrk, wrk->objcore, NULL));
 		wrk->objcore = NULL;
+		VBE_DerefBusyObj(&wrk->busyobj);
 		sp->step = STP_PASS;
 		return (0);
 	case VCL_RET_FETCH:
@@ -1279,6 +1297,7 @@ cnt_miss(struct sess *sp)
 	case VCL_RET_RESTART:
 		AZ(HSH_Deref(wrk, wrk->objcore, NULL));
 		wrk->objcore = NULL;
+		VBE_DerefBusyObj(&wrk->busyobj);
 		INCOMPL();
 	default:
 		WRONG("Illegal action in vcl_miss{}");
@@ -1327,6 +1346,7 @@ cnt_pass(struct sess *sp)
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
 	AZ(wrk->obj);
+	AZ(wrk->busyobj);
 
 	WS_Reset(wrk->ws, NULL);
 	http_Setup(wrk->bereq, wrk->ws);
@@ -1345,7 +1365,7 @@ cnt_pass(struct sess *sp)
 	wrk->acct_tmp.pass++;
 	sp->sendbody = 1;
 	sp->step = STP_FETCH;
-	New_BusyObj(wrk);
+	wrk->busyobj = VBE_GetBusyObj();
 	return (0);
 }
 
@@ -1433,6 +1453,7 @@ cnt_recv(struct sess *sp)
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
 	AZ(wrk->obj);
+	AZ(wrk->busyobj);
 	assert(wrk->wrw.ciov == wrk->wrw.siov);
 
 	/* By default we use the first backend */
diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 1f1795c..e33cdbe 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -106,11 +106,6 @@ HSH_Prealloc(const struct sess *sp)
 	}
 	CHECK_OBJ_NOTNULL(w->nwaitinglist, WAITINGLIST_MAGIC);
 
-	if (w->nbusyobj == NULL) {
-		ALLOC_OBJ(w->nbusyobj, BUSYOBJ_MAGIC);
-		XXXAN(w->nbusyobj);
-	}
-
 	if (hash->prep != NULL)
 		hash->prep(sp);
 }
@@ -139,10 +134,10 @@ HSH_Cleanup(struct worker *w)
 		free(w->nhashpriv);
 		w->nhashpriv = NULL;
 	}
-	if (w->nbusyobj != NULL) {
-		FREE_OBJ(w->nbusyobj);
-		w->nbusyobj = NULL;
-	}
+	/* if (w->nbusyobj != NULL) { */
+	/* 	FREE_OBJ(w->nbusyobj); */
+	/* 	w->nbusyobj = NULL; */
+	/* } */
 }
 
 void
@@ -456,7 +451,8 @@ HSH_Lookup(struct sess *sp, struct objhead **poh)
 	AN(oc->flags & OC_F_BUSY);
 	oc->refcnt = 1;
 
-	New_BusyObj(w);
+	AZ(w->busyobj);
+	w->busyobj = VBE_GetBusyObj();
 
 	VRY_Validate(sp->vary_b);
 	if (sp->vary_l != NULL)
@@ -626,8 +622,6 @@ HSH_Unbusy(struct worker *wrk)
 	VTAILQ_REMOVE(&oh->objcs, oc, list);
 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, list);
 	oc->flags &= ~OC_F_BUSY;
-	AZ(wrk->nbusyobj);
-	wrk->nbusyobj = oc->busyobj;
 	oc->busyobj = NULL;
 	if (oh->waitinglist != NULL)
 		hsh_rush(oh);
@@ -716,16 +710,6 @@ HSH_Deref(struct worker *w, struct objcore *oc, struct object **oo)
 	BAN_DestroyObj(oc);
 	AZ(oc->ban);
 
-	if (oc->flags & OC_F_BUSY) {
-		CHECK_OBJ_NOTNULL(oc->busyobj, BUSYOBJ_MAGIC);
-		if (w->nbusyobj == NULL)
-			w->nbusyobj = oc->busyobj;
-		else
-			FREE_OBJ(oc->busyobj);
-		oc->busyobj = NULL;
-	}
-	AZ(oc->busyobj);
-
 	if (oc->methods != NULL) {
 		oc_freeobj(oc);
 		w->stats.n_object--;
diff --git a/bin/varnishd/cache/cache_main.c b/bin/varnishd/cache/cache_main.c
index fd31de4..fbe7624 100644
--- a/bin/varnishd/cache/cache_main.c
+++ b/bin/varnishd/cache/cache_main.c
@@ -118,6 +118,7 @@ child_main(void)
 	HTTP_Init();
 
 	VBE_Init();
+	VBE_InitCfg();
 	VBP_Init();
 	WRK_Init();
 	Pool_Init();
diff --git a/bin/varnishtest/tests/r01068.vtc b/bin/varnishtest/tests/r01068.vtc
new file mode 100644
index 0000000..c055d19
--- /dev/null
+++ b/bin/varnishtest/tests/r01068.vtc
@@ -0,0 +1,24 @@
+varnishtest "Bug 1068 restart on hit in vcl_deliver causes segfault"
+
+server s1 {
+	rxreq
+	txresp
+} -start
+
+varnish v1 -vcl+backend {
+	sub vcl_deliver {
+		if (req.http.x-restart && req.restarts == 0) {
+			return (restart);
+		}
+	}
+} -start
+
+client c1 {
+	txreq
+	rxresp
+	expect resp.status == 200
+
+	txreq -hdr "x-restart: true"
+	rxresp
+	expect resp.status == 200
+} -run
diff --git a/include/tbl/locks.h b/include/tbl/locks.h
index f1b634b..2ad717d 100644
--- a/include/tbl/locks.h
+++ b/include/tbl/locks.h
@@ -49,4 +49,5 @@ LOCK(vbp)
 LOCK(vbe)
 LOCK(backend)
 LOCK(vcapace)
+LOCK(nbusyobj)
 /*lint -restore */



More information about the varnish-commit mailing list