r2514 - trunk/varnish-cache/bin/varnishd

phk at projects.linpro.no phk at projects.linpro.no
Tue Feb 19 12:10:12 CET 2008


Author: phk
Date: 2008-02-19 12:10:12 +0100 (Tue, 19 Feb 2008)
New Revision: 2514

Modified:
   trunk/varnish-cache/bin/varnishd/cache_acceptor_kqueue.c
Log:
Revise the kqueue_acceptor with a sledgehammer.

Use EV_ONESHOT for all session events to make the kernel delete the
event when it fires, to remove any chance of any race with session
state and kqueue event arming.

For sessions with acceptfilter, this does just what we want (and I
kick myself for not realizing this much sooner).

For sessions where the acceptfilter is not enabled or has given up,
this results in an extra kevent arming operation for each read, but
this is still a much lower overhead than synchrnously deleting
the event when before passing the session on.

Delete all the workarounds and band-aids that had accumulated.

All in all, a win-win-win situation.

I have no idea how many tickets this will close.



Modified: trunk/varnish-cache/bin/varnishd/cache_acceptor_kqueue.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_acceptor_kqueue.c	2008-02-18 17:04:30 UTC (rev 2513)
+++ trunk/varnish-cache/bin/varnishd/cache_acceptor_kqueue.c	2008-02-19 11:10:12 UTC (rev 2514)
@@ -51,88 +51,13 @@
 #include "cache.h"
 #include "cache_acceptor.h"
 
-/**********************************************************************
- * Generic bitmap functions, may be generalized at some point.
- */
 
-#define VBITMAP_TYPE	unsigned	/* Our preferred wordsize */
-#define VBITMAP_LUMP	(32*1024)	/* How many bits we alloc at a time */
-#define VBITMAP_WORD	(sizeof(VBITMAP_TYPE) * 8)
-#define VBITMAP_IDX(n)	(n / VBITMAP_WORD)
-#define VBITMAP_BIT(n)	(1U << (n % VBITMAP_WORD))
-
-struct vbitmap {
-	VBITMAP_TYPE	*bits;
-	unsigned	nbits;
-};
-
-static void
-vbit_expand(struct vbitmap *vb, unsigned bit)
-{
-	unsigned char *p;
-
-	bit += VBITMAP_LUMP - 1;
-	bit -= (bit % VBITMAP_LUMP);
-	VSL(SLT_Debug, 0, "Expanding KQ VBIT to %u", bit);
-	p = realloc(vb->bits, bit / 8);
-	AN(p);
-	memset(p + vb->nbits / 8, 0, (bit - vb->nbits) / 8);
-	vb->bits = (void*)p;
-	vb->nbits = bit;
-}
-
-static struct vbitmap *
-vbit_init(unsigned initial)
-{
-	struct vbitmap *vb;
-
-	vb = calloc(sizeof *vb, 1);
-	AN(vb);
-	if (initial == 0) {
-#ifdef HAVE_GETDTABLESIZE
-		initial = getdtablesize();
-#else
-		initial = VBITMAP_LUMP;
-#endif
-	}
-	vbit_expand(vb, initial);
-	return (vb);
-}
-
-static void
-vbit_set(struct vbitmap *vb, unsigned bit)
-{
-
-	if (bit >= vb->nbits)
-		vbit_expand(vb, bit);
-	vb->bits[VBITMAP_IDX(bit)] |= VBITMAP_BIT(bit);
-}
-
-static void
-vbit_clr(struct vbitmap *vb, unsigned bit)
-{
-
-	if (bit >= vb->nbits)
-		vbit_expand(vb, bit);
-	vb->bits[VBITMAP_IDX(bit)] &= ~VBITMAP_BIT(bit);
-}
-
-static int
-vbit_test(struct vbitmap *vb, unsigned bit)
-{
-
-	if (bit >= vb->nbits)
-		vbit_expand(vb, bit);
-	return (vb->bits[VBITMAP_IDX(bit)] & VBITMAP_BIT(bit));
-}
-
 /**********************************************************************/
 
 
 static pthread_t vca_kqueue_thread;
 static int kq = -1;
 
-static struct vbitmap *vca_kqueue_bits;
 
 static VTAILQ_HEAD(,sess) sesshead = VTAILQ_HEAD_INITIALIZER(sesshead);
 
@@ -149,21 +74,7 @@
 	if (nki == 0)
 		return;
 	i = kevent(kq, ki, nki, NULL, 0, NULL);
-	assert(i <= 0);
-	if (i < 0) {
-		/* 
-		 * We do not push kevents into the kernel before 
-		 * passing the session off to a worker thread, so
-		 * by the time we get around to delete the event
-		 * the fd may be closed and we get an ENOENT back
-		 * once we do flush.
-		 * We can get EBADF the same way if the client closes
-		 * on us.  In that case, we get no kevent on that
-		 * socket, but the TAILQ still has it, and it will
-		 * be GC'ed there after the timeout.
-		 */
-		assert(errno == ENOENT || errno == EBADF);
-	}
+	assert(i == 0);
 	nki = 0;
 }
 
@@ -172,8 +83,9 @@
 {
 
 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
-	if (sp->fd < 0)
-		return;
+	assert(sp->fd >= 0);
+	if (params->diag_bitmap & 4)
+		VSL(SLT_Debug, sp->fd, "KQ: EV_SET sp %p arm %x", sp, arm);
 	EV_SET(&ki[nki], sp->fd, EVFILT_READ, arm, 0, 0, sp);
 	if (++nki == NKEV) 
 		vca_kq_flush();
@@ -196,52 +108,40 @@
 			CHECK_OBJ_NOTNULL(ss[j], SESS_MAGIC);
 			assert(ss[j]->fd >= 0);
 			AZ(ss[j]->obj);
-			AZ(vbit_test(vca_kqueue_bits, ss[j]->fd));
-			vbit_set(vca_kqueue_bits, ss[j]->fd);
 			VTAILQ_INSERT_TAIL(&sesshead, ss[j], list);
-			vca_kq_sess(ss[j], EV_ADD);
+			vca_kq_sess(ss[j], EV_ADD | EV_ONESHOT);
 			j++;
 			i -= sizeof ss[0];
 		}
 		assert(i == 0);
 		return;
 	}
-	if (!vbit_test(vca_kqueue_bits, kp->ident)) {
-		if (params->diag_bitmap & 0x4)
-			VSL(SLT_Debug, kp->ident,
-			    "KQ: not my fd %d, sp %p kev data %lu flags 0x%x%s",
-			    kp->ident, kp->udata, (unsigned long)kp->data,
-			    kp->flags, (kp->flags & EV_EOF) ? " EOF" : "");
-		return;
-	}
 	CAST_OBJ_NOTNULL(sp, kp->udata, SESS_MAGIC);
 	if (params->diag_bitmap & 0x4)
-		VSL(SLT_Debug, sp->id, "sp %p kev data %lu flags 0x%x%s",
+		VSL(SLT_Debug, sp->id, "KQ: sp %p kev data %lu flags 0x%x%s",
 		    sp, (unsigned long)kp->data, kp->flags,
 		    (kp->flags & EV_EOF) ? " EOF" : "");
-	if (sp->fd == -1 || kp->fflags == 0) {
-		if (params->diag_bitmap & 0x4)
-			VSL(SLT_Debug, sp->id, "KQ: got event 0x%04x on fd %d",
-			    kp->fflags, kp->ident);
-		return;
-	}
+
 	spassert(sp->id == kp->ident);
 	spassert(sp->fd == sp->id);
 	if (kp->data > 0) {
 		i = HTC_Rx(sp->htc);
-		if (i == 0)
+		if (i == 0) {
+			vca_kq_sess(sp, EV_ADD | EV_ONESHOT);
 			return;	/* more needed */
-		vbit_clr(vca_kqueue_bits, sp->fd);
+		}
 		VTAILQ_REMOVE(&sesshead, sp, list);
-		vca_kq_sess(sp, EV_DELETE);
 		vca_handover(sp, i);
 		return;
 	} else if (kp->flags == EV_EOF) {
-		vbit_clr(vca_kqueue_bits, sp->fd);
 		VTAILQ_REMOVE(&sesshead, sp, list);
 		vca_close_session(sp, "EOF");
 		SES_Delete(sp);
 		return;
+	} else {
+		VSL(SLT_Debug, sp->id, "KQ: sp %p kev data %lu flags 0x%x%s",
+		    sp, (unsigned long)kp->data, kp->flags,
+		    (kp->flags & EV_EOF) ? " EOF" : "");
 	}
 }
 
@@ -273,10 +173,6 @@
 		assert(n >= 1 && n <= NKEV);
 		nki = 0;
 		for (kp = ke, j = 0; j < n; j++, kp++) {
-			if (kp->flags & EV_ERROR) {
-				/* See comment in vca_kq_sess() */
-				continue;
-			}
 			if (kp->filter == EVFILT_TIMER) {
 				dotimer = 1;
 				continue;
@@ -301,7 +197,6 @@
 				break;
 			if (sp->t_open > deadline)
 				break;
-			vbit_clr(vca_kqueue_bits, sp->fd);
 			VTAILQ_REMOVE(&sesshead, sp, list);
 			vca_close_session(sp, "timeout");
 			SES_Delete(sp);
@@ -320,7 +215,6 @@
 	i |= O_NONBLOCK;
 	i = fcntl(vca_pipes[0], F_SETFL, i);
 
-	vca_kqueue_bits = vbit_init(0);
 	AZ(pthread_create(&vca_kqueue_thread, NULL, vca_kqueue_main, NULL));
 }
 




More information about the varnish-commit mailing list