r4497 - in trunk/varnish-cache: bin/varnishd include
phk at projects.linpro.no
phk at projects.linpro.no
Wed Jan 27 17:06:46 CET 2010
Author: phk
Date: 2010-01-27 17:06:46 +0100 (Wed, 27 Jan 2010)
New Revision: 4497
Modified:
trunk/varnish-cache/bin/varnishd/cache_acceptor.c
trunk/varnish-cache/bin/varnishd/cache_hash.c
trunk/varnish-cache/bin/varnishd/heritage.h
trunk/varnish-cache/bin/varnishd/mgt_param.c
trunk/varnish-cache/include/stat_field.h
Log:
The acceptor already has a back-off feature for when we run out of
filedescriptors, generalize that concept and use it for all cases
where we cannot accept and/or serve the connection: (Lack of sessions,
lack of workerthreads).
This is controlled by three paramters:
Everytime we run into trouble, we increase the sleep-time by:
acceptor_sleep_incr (0.001 s)
But we never let it get above
acceptor_sleep_max (0.050 s)
Once we manage to accept and schedule a connection, we multiply
the sleep-time by:
acceptor_sleep_decay (0.9)
The default numbers are more or less picked out of thin air.
Two new stats counters help us keep track of this:
accept_fail
where accept(2) returns error. This can be out of
file-descriptors, but also clients which closed while
they were stuck in the accept-queue. Under normal
operation, a minor trickle is probably to be expected.
client_drop
New connection dropped, because we could not get a
session for it, or because the workerthreads were
too busy.
client_drop_late
An previously served connection was dropped for the
same reasons.
As always, feedback welcome.
Modified: trunk/varnish-cache/bin/varnishd/cache_acceptor.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_acceptor.c 2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/bin/varnishd/cache_acceptor.c 2010-01-27 16:06:46 UTC (rev 4497)
@@ -187,7 +187,7 @@
struct pollfd *pfd;
struct listen_sock *ls;
unsigned u;
- double now;
+ double now, pace;
THR_SetName("cache-acceptor");
(void)arg;
@@ -207,6 +207,7 @@
}
need_test = 1;
+ pace = 0;
while (1) {
#ifdef SO_SNDTIMEO_WORKS
if (params->send_timeout != tv_sndtimeo.tv_sec) {
@@ -234,6 +235,13 @@
}
}
#endif
+ /* Bound the pacing delay by parameter */
+ if (pace > params->acceptor_sleep_max)
+ pace = params->acceptor_sleep_max;
+ if (pace < params->acceptor_sleep_incr)
+ pace = 0.0;
+ if (pace > 0.0)
+ TIM_sleep(pace);
i = poll(pfd, heritage.nsocks, 1000);
now = TIM_real();
u = 0;
@@ -247,6 +255,7 @@
addr = (void*)&addr_s;
i = accept(ls->sock, addr, &l);
if (i < 0) {
+ VSL_stats->accept_fail++;
switch (errno) {
case EAGAIN:
case ECONNABORTED:
@@ -255,14 +264,13 @@
VSL(SLT_Debug, ls->sock,
"Too many open files "
"when accept(2)ing. Sleeping.");
- TIM_sleep(
- params->accept_fd_holdoff * 0.001);
+ pace += params->acceptor_sleep_incr;
break;
default:
VSL(SLT_Debug, ls->sock,
"Accept failed: %s",
strerror(errno));
- /* XXX: stats ? */
+ pace += params->acceptor_sleep_incr;
break;
}
continue;
@@ -271,6 +279,7 @@
if (sp == NULL) {
AZ(close(i));
VSL_stats->client_drop++;
+ pace += params->acceptor_sleep_incr;
continue;
}
sp->fd = i;
@@ -283,7 +292,12 @@
sp->sockaddrlen = l;
sp->step = STP_FIRST;
- WRK_QueueSession(sp);
+ if (WRK_QueueSession(sp)) {
+ VSL_stats->client_drop++;
+ pace += params->acceptor_sleep_incr;
+ } else {
+ pace *= params->acceptor_sleep_decay;
+ }
}
}
NEEDLESS_RETURN(NULL);
@@ -306,7 +320,8 @@
break;
case 1:
sp->step = STP_START;
- WRK_QueueSession(sp);
+ if (WRK_QueueSession(sp))
+ VSL_stats->client_drop_late++;
break;
default:
INCOMPL();
Modified: trunk/varnish-cache/bin/varnishd/cache_hash.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_hash.c 2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/bin/varnishd/cache_hash.c 2010-01-27 16:06:46 UTC (rev 4497)
@@ -463,7 +463,14 @@
AZ(sp->wrk);
VTAILQ_REMOVE(&oh->waitinglist, sp, list);
DSL(0x20, SLT_Debug, sp->id, "off waiting list");
- WRK_QueueSession(sp);
+ if (WRK_QueueSession(sp)) {
+ /*
+ * We could not schedule the session, leave the
+ * rest on the busy list.
+ */
+ VSL_stats->client_drop_late++;
+ break;
+ }
}
}
Modified: trunk/varnish-cache/bin/varnishd/heritage.h
===================================================================
--- trunk/varnish-cache/bin/varnishd/heritage.h 2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/bin/varnishd/heritage.h 2010-01-27 16:06:46 UTC (rev 4497)
@@ -186,9 +186,10 @@
/* Acceptable clockskew with backends */
unsigned clock_skew;
- /* Amount of time to sleep when running out of file
- descriptors. In msecs */
- unsigned accept_fd_holdoff;
+ /* Acceptor pacer parameters */
+ double acceptor_sleep_max;
+ double acceptor_sleep_incr;
+ double acceptor_sleep_decay;
/* Get rid of duplicate purges */
unsigned purge_dups;
Modified: trunk/varnish-cache/bin/varnishd/mgt_param.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/mgt_param.c 2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/bin/varnishd/mgt_param.c 2010-01-27 16:06:46 UTC (rev 4497)
@@ -132,7 +132,6 @@
cli_out(cli, "%.6f", *dest);
}
-#if 0
/*--------------------------------------------------------------------*/
static void
@@ -163,7 +162,6 @@
} else
cli_out(cli, "%f", *dest);
}
-#endif
/*--------------------------------------------------------------------*/
@@ -703,12 +701,30 @@
"and backend request. This parameter does not apply to pipe.",
0,
"60", "s" },
- { "accept_fd_holdoff", tweak_timeout,
- &master.accept_fd_holdoff, 0, 3600*1000,
- "If we run out of file descriptors, the accept thread will "
- "sleep. This parameter control for how long it will sleep.",
+ { "acceptor_sleep_max", tweak_timeout_double,
+ &master.acceptor_sleep_max, 0, 10,
+ "If we run out of resources, such as file descriptors or "
+ "worker threads, the acceptor will sleep between accepts.\n"
+ "This parameter limits how long it can sleep between "
+ "attempts to accept new connections.",
EXPERIMENTAL,
- "50", "ms" },
+ "0.050", "s" },
+ { "acceptor_sleep_incr", tweak_timeout_double,
+ &master.acceptor_sleep_incr, 0, 1,
+ "If we run out of resources, such as file descriptors or "
+ "worker threads, the acceptor will sleep between accepts.\n"
+ "This parameter control how much longer we sleep, each time "
+ "we fail to accept a new connection.",
+ EXPERIMENTAL,
+ "0.001", "s" },
+ { "acceptor_sleep_decay", tweak_generic_double,
+ &master.acceptor_sleep_decay, 0, 1,
+ "If we run out of resources, such as file descriptors or "
+ "worker threads, the acceptor will sleep between accepts.\n"
+ "This parameter (multiplicatively) reduce the sleep duration "
+ "for each succesfull accept. (ie: 0.9 = reduce by 10%)",
+ EXPERIMENTAL,
+ "0.900", "" },
{ "clock_skew", tweak_uint, &master.clock_skew, 0, UINT_MAX,
"How much clockskew we are willing to accept between the "
"backend and our own clock.",
Modified: trunk/varnish-cache/include/stat_field.h
===================================================================
--- trunk/varnish-cache/include/stat_field.h 2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/include/stat_field.h 2010-01-27 16:06:46 UTC (rev 4497)
@@ -33,7 +33,7 @@
*/
MAC_STAT(client_conn, uint64_t, 0, 'a', "Client connections accepted")
-MAC_STAT(client_drop, uint64_t, 0, 'a', "Connection dropped, no sess")
+MAC_STAT(client_drop, uint64_t, 0, 'a', "Connection dropped, no sess/wrk")
MAC_STAT(client_req, uint64_t, 1, 'a', "Client requests received")
MAC_STAT(cache_hit, uint64_t, 1, 'a', "Cache hits")
@@ -149,3 +149,5 @@
MAC_STAT(esi_parse, uint64_t, 0, 'a', "Objects ESI parsed (unlock)")
MAC_STAT(esi_errors, uint64_t, 0, 'a', "ESI parse errors (unlock)")
+MAC_STAT(accept_fail, uint64_t, 0, 'a', "Accept failures")
+MAC_STAT(client_drop_late, uint64_t, 0, 'a', "Connection dropped late")
More information about the varnish-commit
mailing list