r4497 - in trunk/varnish-cache: bin/varnishd include

phk at projects.linpro.no phk at projects.linpro.no
Wed Jan 27 17:06:46 CET 2010


Author: phk
Date: 2010-01-27 17:06:46 +0100 (Wed, 27 Jan 2010)
New Revision: 4497

Modified:
   trunk/varnish-cache/bin/varnishd/cache_acceptor.c
   trunk/varnish-cache/bin/varnishd/cache_hash.c
   trunk/varnish-cache/bin/varnishd/heritage.h
   trunk/varnish-cache/bin/varnishd/mgt_param.c
   trunk/varnish-cache/include/stat_field.h
Log:
The acceptor already has a back-off feature for when we run out of
filedescriptors, generalize that concept and use it for all cases
where we cannot accept and/or serve the connection:  (Lack of sessions,
lack of workerthreads).

This is controlled by three paramters:

Everytime we run into trouble, we increase the sleep-time by:
	acceptor_sleep_incr (0.001 s)

But we never let it get above
	acceptor_sleep_max  (0.050 s)

Once we manage to accept and schedule a connection, we multiply
the sleep-time by:
	acceptor_sleep_decay (0.9)

The default numbers are more or less picked out of thin air.

Two new stats counters help us keep track of this:
	accept_fail
		where accept(2) returns error.  This can be out of
		file-descriptors, but also clients which closed while
		they were stuck in the accept-queue.  Under normal
		operation, a minor trickle is probably to be expected.

	client_drop
		New connection dropped, because we could not get a
		session for it, or because the workerthreads were
		too busy.

	client_drop_late
		An previously served connection was dropped for the
		same reasons.

As always, feedback welcome.




Modified: trunk/varnish-cache/bin/varnishd/cache_acceptor.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_acceptor.c	2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/bin/varnishd/cache_acceptor.c	2010-01-27 16:06:46 UTC (rev 4497)
@@ -187,7 +187,7 @@
 	struct pollfd *pfd;
 	struct listen_sock *ls;
 	unsigned u;
-	double now;
+	double now, pace;
 
 	THR_SetName("cache-acceptor");
 	(void)arg;
@@ -207,6 +207,7 @@
 	}
 
 	need_test = 1;
+	pace = 0;
 	while (1) {
 #ifdef SO_SNDTIMEO_WORKS
 		if (params->send_timeout != tv_sndtimeo.tv_sec) {
@@ -234,6 +235,13 @@
 			}
 		}
 #endif
+		/* Bound the pacing delay by parameter */
+		if (pace > params->acceptor_sleep_max)
+			pace = params->acceptor_sleep_max;
+		if (pace < params->acceptor_sleep_incr)
+			pace = 0.0;
+		if (pace > 0.0)
+			TIM_sleep(pace);
 		i = poll(pfd, heritage.nsocks, 1000);
 		now = TIM_real();
 		u = 0;
@@ -247,6 +255,7 @@
 			addr = (void*)&addr_s;
 			i = accept(ls->sock, addr, &l);
 			if (i < 0) {
+				VSL_stats->accept_fail++;
 				switch (errno) {
 				case EAGAIN:
 				case ECONNABORTED:
@@ -255,14 +264,13 @@
 					VSL(SLT_Debug, ls->sock,
 					    "Too many open files "
 					    "when accept(2)ing. Sleeping.");
-					TIM_sleep(
-					    params->accept_fd_holdoff * 0.001);
+					pace += params->acceptor_sleep_incr;
 					break;
 				default:
 					VSL(SLT_Debug, ls->sock,
 					    "Accept failed: %s",
 					    strerror(errno));
-					/* XXX: stats ? */
+					pace += params->acceptor_sleep_incr;
 					break;
 				}
 				continue;
@@ -271,6 +279,7 @@
 			if (sp == NULL) {
 				AZ(close(i));
 				VSL_stats->client_drop++;
+				pace += params->acceptor_sleep_incr;
 				continue;
 			}
 			sp->fd = i;
@@ -283,7 +292,12 @@
 			sp->sockaddrlen = l;
 
 			sp->step = STP_FIRST;
-			WRK_QueueSession(sp);
+			if (WRK_QueueSession(sp)) {
+				VSL_stats->client_drop++;
+				pace += params->acceptor_sleep_incr;
+			} else {
+				pace *= params->acceptor_sleep_decay;
+			}
 		}
 	}
 	NEEDLESS_RETURN(NULL);
@@ -306,7 +320,8 @@
 		break;
 	case 1:
 		sp->step = STP_START;
-		WRK_QueueSession(sp);
+		if (WRK_QueueSession(sp))
+			VSL_stats->client_drop_late++;
 		break;
 	default:
 		INCOMPL();

Modified: trunk/varnish-cache/bin/varnishd/cache_hash.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_hash.c	2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/bin/varnishd/cache_hash.c	2010-01-27 16:06:46 UTC (rev 4497)
@@ -463,7 +463,14 @@
 		AZ(sp->wrk);
 		VTAILQ_REMOVE(&oh->waitinglist, sp, list);
 		DSL(0x20, SLT_Debug, sp->id, "off waiting list");
-		WRK_QueueSession(sp);
+		if (WRK_QueueSession(sp)) {
+			/*
+			 * We could not schedule the session, leave the
+			 * rest on the busy list.
+			 */
+			VSL_stats->client_drop_late++;
+			break;
+		}
 	}
 }
 

Modified: trunk/varnish-cache/bin/varnishd/heritage.h
===================================================================
--- trunk/varnish-cache/bin/varnishd/heritage.h	2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/bin/varnishd/heritage.h	2010-01-27 16:06:46 UTC (rev 4497)
@@ -186,9 +186,10 @@
 	/* Acceptable clockskew with backends */
 	unsigned		clock_skew;
 
-	/* Amount of time to sleep when running out of file
-	   descriptors.  In msecs */
-	unsigned		accept_fd_holdoff;
+	/* Acceptor pacer parameters */
+	double			acceptor_sleep_max;
+	double			acceptor_sleep_incr;
+	double			acceptor_sleep_decay;
 
 	/* Get rid of duplicate purges */
 	unsigned		purge_dups;

Modified: trunk/varnish-cache/bin/varnishd/mgt_param.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/mgt_param.c	2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/bin/varnishd/mgt_param.c	2010-01-27 16:06:46 UTC (rev 4497)
@@ -132,7 +132,6 @@
 		cli_out(cli, "%.6f", *dest);
 }
 
-#if 0
 /*--------------------------------------------------------------------*/
 
 static void
@@ -163,7 +162,6 @@
 	} else
 		cli_out(cli, "%f", *dest);
 }
-#endif
 
 /*--------------------------------------------------------------------*/
 
@@ -703,12 +701,30 @@
 		"and backend request. This parameter does not apply to pipe.",
 		0,
 		"60", "s" },
-	{ "accept_fd_holdoff", tweak_timeout,
-		&master.accept_fd_holdoff, 0,  3600*1000,
-		"If we run out of file descriptors, the accept thread will "
-		"sleep.  This parameter control for how long it will sleep.",
+	{ "acceptor_sleep_max", tweak_timeout_double,
+		&master.acceptor_sleep_max, 0,  10,
+		"If we run out of resources, such as file descriptors or "
+		"worker threads, the acceptor will sleep between accepts.\n"
+		"This parameter limits how long it can sleep between "
+		"attempts to accept new connections.",
 		EXPERIMENTAL,
-		"50", "ms" },
+		"0.050", "s" },
+	{ "acceptor_sleep_incr", tweak_timeout_double,
+		&master.acceptor_sleep_incr, 0,  1,
+		"If we run out of resources, such as file descriptors or "
+		"worker threads, the acceptor will sleep between accepts.\n"
+		"This parameter control how much longer we sleep, each time "
+		"we fail to accept a new connection.",
+		EXPERIMENTAL,
+		"0.001", "s" },
+	{ "acceptor_sleep_decay", tweak_generic_double,
+		&master.acceptor_sleep_decay, 0,  1,
+		"If we run out of resources, such as file descriptors or "
+		"worker threads, the acceptor will sleep between accepts.\n"
+		"This parameter (multiplicatively) reduce the sleep duration "
+		"for each succesfull accept. (ie: 0.9 = reduce by 10%)",
+		EXPERIMENTAL,
+		"0.900", "" },
 	{ "clock_skew", tweak_uint, &master.clock_skew, 0, UINT_MAX,
 		"How much clockskew we are willing to accept between the "
 		"backend and our own clock.",

Modified: trunk/varnish-cache/include/stat_field.h
===================================================================
--- trunk/varnish-cache/include/stat_field.h	2010-01-27 15:59:30 UTC (rev 4496)
+++ trunk/varnish-cache/include/stat_field.h	2010-01-27 16:06:46 UTC (rev 4497)
@@ -33,7 +33,7 @@
  */
 
 MAC_STAT(client_conn,		uint64_t, 0, 'a', "Client connections accepted")
-MAC_STAT(client_drop,		uint64_t, 0, 'a', "Connection dropped, no sess")
+MAC_STAT(client_drop,		uint64_t, 0, 'a', "Connection dropped, no sess/wrk")
 MAC_STAT(client_req,		uint64_t, 1, 'a', "Client requests received")
 
 MAC_STAT(cache_hit,		uint64_t, 1, 'a', "Cache hits")
@@ -149,3 +149,5 @@
 
 MAC_STAT(esi_parse,		uint64_t, 0, 'a', "Objects ESI parsed (unlock)")
 MAC_STAT(esi_errors,		uint64_t, 0, 'a', "ESI parse errors (unlock)")
+MAC_STAT(accept_fail,		uint64_t, 0, 'a', "Accept failures")
+MAC_STAT(client_drop_late,	uint64_t, 0, 'a', "Connection dropped late")



More information about the varnish-commit mailing list