[master] c037d5b Rewrite the epoll waiter to the new reality.
Poul-Henning Kamp
phk at FreeBSD.org
Thu May 28 00:09:10 CEST 2015
commit c037d5b21a345a3eead0be9daaee221523a20a81
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date: Wed May 27 22:08:53 2015 +0000
Rewrite the epoll waiter to the new reality.
diff --git a/bin/varnishd/waiter/cache_waiter_epoll.c b/bin/varnishd/waiter/cache_waiter_epoll.c
index bba3754..192c690 100644
--- a/bin/varnishd/waiter/cache_waiter_epoll.c
+++ b/bin/varnishd/waiter/cache_waiter_epoll.c
@@ -58,83 +58,85 @@ struct vwe {
int epfd;
struct waiter *waiter;
pthread_t thread;
- VTAILQ_HEAD(,waited) list;
- struct lock mtx;
+ double next;
+ int pipe[2];
+ unsigned nwaited;
int die;
+ struct lock mtx;
};
-static void
-vwe_eev(struct vwe *vwe, const struct epoll_event *ep, double now)
-{
- struct waited *wp;
-
- AN(ep->data.ptr);
- CAST_OBJ_NOTNULL(wp, ep->data.ptr, WAITED_MAGIC);
- if (ep->events & EPOLLIN) {
- Wait_Call(vwe->waiter, wp, WAITER_ACTION, now);
- } else if (ep->events & EPOLLERR) {
- Wait_Call(vwe->waiter, wp, WAITER_REMCLOSE, now);
- } else if (ep->events & EPOLLHUP) {
- Wait_Call(vwe->waiter, wp, WAITER_REMCLOSE, now);
- } else if (ep->events & EPOLLRDHUP) {
- Wait_Call(vwe->waiter, wp, WAITER_REMCLOSE, now);
- }
-}
-
/*--------------------------------------------------------------------*/
static void *
vwe_thread(void *priv)
{
struct epoll_event ev[NEEV], *ep;
- struct waited *wp, *wp2;
- double now, idle, last_idle;
+ struct waited *wp;
+ struct waiter *w;
+ double now, then;
int i, n;
struct vwe *vwe;
- VTAILQ_HEAD(,waited) tlist;
+ char c;
CAST_OBJ_NOTNULL(vwe, priv, VWE_MAGIC);
-
+ w = vwe->waiter;
+ CHECK_OBJ_NOTNULL(w, WAITER_MAGIC);
THR_SetName("cache-epoll");
- last_idle = 0.0;
+ now = VTIM_real();
+ Lck_Lock(&vwe->mtx);
while (1) {
- i = floor(.3 * 1e3 * Wait_Tmo(vwe->waiter, NULL));
+ while (1) {
+ /*
+ * XXX: We could avoid many syscalls here if we were
+ * XXX: allowed to just close the fd's on timeout.
+ */
+ then = Wait_HeapDue(w, &wp);
+ if (wp == NULL) {
+ vwe->next = now + 100;
+ break;
+ } else if (then > now) {
+ vwe->next = then;
+ break;
+ }
+ CHECK_OBJ_NOTNULL(wp, WAITED_MAGIC);
+ AZ(epoll_ctl(vwe->epfd, EPOLL_CTL_DEL, wp->fd, NULL));
+ vwe->nwaited--;
+ Wait_Call(w, wp, WAITER_TIMEOUT, now);
+ }
+ then = vwe->next - now;
+ i = (int)floor(1e3 * then);
+ assert(i > 0);
+ Lck_Unlock(&vwe->mtx);
n = epoll_wait(vwe->epfd, ev, NEEV, i);
- if (n < 0 && vwe->die)
- break;
assert(n >= 0);
+ assert(n <= NEEV);
now = VTIM_real();
- for (ep = ev, i = 0; i < n; i++, ep++) {
- CAST_OBJ_NOTNULL(wp, ep->data.ptr, WAITED_MAGIC);
- AZ(epoll_ctl(vwe->epfd, EPOLL_CTL_DEL, wp->fd, NULL));
- Lck_Lock(&vwe->mtx);
- VTAILQ_REMOVE(&vwe->list, wp, list);
- Lck_Unlock(&vwe->mtx);
- vwe_eev(vwe, ep, now);
- }
- idle = now - Wait_Tmo(vwe->waiter, NULL);
- if (now - last_idle < .3 * Wait_Tmo(vwe->waiter, NULL))
- continue;
- last_idle = now;
- VTAILQ_INIT(&tlist);
Lck_Lock(&vwe->mtx);
- VTAILQ_FOREACH_SAFE(wp, &vwe->list, list, wp2) {
- if (wp->idle > idle)
+ for (ep = ev, i = 0; i < n; i++, ep++) {
+ if (ep->data.ptr == vwe) {
+ assert(read(vwe->pipe[0], &c, 1) == 1);
continue;
- VTAILQ_REMOVE(&vwe->list, wp, list);
- VTAILQ_INSERT_TAIL(&tlist, wp, list);
+ }
+ CAST_OBJ_NOTNULL(wp, ep->data.ptr, WAITED_MAGIC);
AZ(epoll_ctl(vwe->epfd, EPOLL_CTL_DEL, wp->fd, NULL));
+ vwe->nwaited--;
+ if (ep->events & EPOLLIN)
+ Wait_Call(w, wp, WAITER_ACTION, now);
+ else if (ep->events & EPOLLERR)
+ Wait_Call(w, wp, WAITER_REMCLOSE, now);
+ else if (ep->events & EPOLLHUP)
+ Wait_Call(w, wp, WAITER_REMCLOSE, now);
+ else
+ Wait_Call(w, wp, WAITER_REMCLOSE, now);
}
- Lck_Unlock(&vwe->mtx);
- while(1) {
- wp = VTAILQ_FIRST(&tlist);
- if (wp == NULL)
- break;
- VTAILQ_REMOVE(&tlist, wp, list);
- Wait_Call(vwe->waiter, wp, WAITER_TIMEOUT, now);
- }
+ if (vwe->nwaited == 0 && vwe->die)
+ break;
}
+ Lck_Unlock(&vwe->mtx);
+ AZ(close(vwe->pipe[0]));
+ AZ(close(vwe->pipe[1]));
+ AZ(close(vwe->epfd));
return (NULL);
}
@@ -150,8 +152,12 @@ vwe_enter(void *priv, struct waited *wp)
ee.events = EPOLLIN | EPOLLRDHUP;
ee.data.ptr = wp;
Lck_Lock(&vwe->mtx);
- VTAILQ_INSERT_TAIL(&vwe->list, wp, list);
+ vwe->nwaited++;
+ Wait_HeapInsert(vwe->waiter, wp);
AZ(epoll_ctl(vwe->epfd, EPOLL_CTL_ADD, wp->fd, &ee));
+ /* If the epoll isn't due before our timeout, poke it via the pipe */
+ if (Wait_When(wp) < vwe->next)
+ assert(write(vwe->pipe[1], "X", 1) == 1);
Lck_Unlock(&vwe->mtx);
return(0);
}
@@ -162,6 +168,7 @@ static void __match_proto__(waiter_init_f)
vwe_init(struct waiter *w)
{
struct vwe *vwe;
+ struct epoll_event ee;
CHECK_OBJ_NOTNULL(w, WAITER_MAGIC);
vwe = w->priv;
@@ -170,8 +177,11 @@ vwe_init(struct waiter *w)
vwe->epfd = epoll_create(1);
assert(vwe->epfd >= 0);
- VTAILQ_INIT(&vwe->list);
Lck_New(&vwe->mtx, lck_misc);
+ AZ(pipe(vwe->pipe));
+ ee.events = EPOLLIN | EPOLLRDHUP;
+ ee.data.ptr = vwe;
+ AZ(epoll_ctl(vwe->epfd, EPOLL_CTL_ADD, vwe->pipe[0], &ee));
AZ(pthread_create(&vwe->thread, NULL, vwe_thread, vwe));
}
@@ -186,20 +196,12 @@ vwe_fini(struct waiter *w)
{
struct vwe *vwe;
void *vp;
- int i;
CAST_OBJ_NOTNULL(vwe, w->priv, VWE_MAGIC);
Lck_Lock(&vwe->mtx);
- while (!VTAILQ_EMPTY(&vwe->list)) {
- Lck_Unlock(&vwe->mtx);
- (void)usleep(100000);
- Lck_Lock(&vwe->mtx);
- }
vwe->die = 1;
- i = vwe->epfd;
- vwe->epfd = -1;
- AZ(close(i));
+ assert(write(vwe->pipe[1], "Y", 1) == 1);
Lck_Unlock(&vwe->mtx);
AZ(pthread_join(vwe->thread, &vp));
Lck_Delete(&vwe->mtx);
diff --git a/bin/varnishd/waiter/mgt_waiter.c b/bin/varnishd/waiter/mgt_waiter.c
index a8c036b..18b3269 100644
--- a/bin/varnishd/waiter/mgt_waiter.c
+++ b/bin/varnishd/waiter/mgt_waiter.c
@@ -40,10 +40,10 @@ static const struct choice waiter_choice[] = {
#if defined(HAVE_KQUEUE)
{ "kqueue", &waiter_kqueue },
#endif
-#if 0
#if defined(HAVE_EPOLL_CTL)
{ "epoll", &waiter_epoll },
#endif
+#if 0
#if defined(HAVE_PORT_CREATE)
{ "ports", &waiter_ports },
#endif
More information about the varnish-commit
mailing list