[master] ffea64d Shave 256 bytes of the worker thread footprint, by using the thread workspace for WRW also.

Mon Feb 13 21:31:29 CET 2012

commit ffea64d751612a6584fd21f0d39a9574c55d18dd
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date:   Mon Feb 13 20:30:48 2012 +0000

    Shave 256 bytes of the worker thread footprint, by using the thread
    workspace for WRW also.

diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index 1371d48..4b1bfc3 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -114,6 +114,7 @@ struct vrt_backend;
 struct vsb;
 struct waitinglist;
 struct worker;
+struct wrw;
 
 #define DIGEST_LEN		32
 
@@ -244,19 +245,6 @@ struct exp {
 
 /*--------------------------------------------------------------------*/
 
-struct wrw {
-	int			*wfd;
-	unsigned		werr;	/* valid after WRW_Flush() */
-	struct iovec		*iov;
-	unsigned		siov;
-	unsigned		niov;
-	ssize_t			liov;
-	ssize_t			cliov;
-	unsigned		ciov;	/* Chunked header marker */
-};
-
-/*--------------------------------------------------------------------*/
-
 struct wrk_accept {
 	unsigned		magic;
 #define WRK_ACCEPT_MAGIC	0x8c4b4d59
@@ -301,7 +289,7 @@ struct worker {
 
 	double			lastused;
 
-	struct wrw		wrw;
+	struct wrw		*wrw;
 
 	pthread_cond_t		cond;
 
@@ -312,10 +300,6 @@ struct worker {
 	uint32_t		*wlb, *wlp, *wle;
 	unsigned		wlr;
 
-	/*
-	 * In practice this workspace is only used for wrk_accept now
-	 * but it might come handy later, so keep it around.  For now.
-	 */
 	struct ws		aws[1];
 
 	struct busyobj		*busyobj;
@@ -893,7 +877,7 @@ void Pool_Init(void);
 void Pool_Work_Thread(void *priv, struct worker *w);
 int Pool_Task(struct pool *pp, struct pool_task *task, enum pool_how how);
 
-#define WRW_IsReleased(w)	((w)->wrw.wfd == NULL)
+#define WRW_IsReleased(w)	((w)->wrw == NULL)
 int WRW_Error(const struct worker *w);
 void WRW_Chunked(struct worker *w);
 void WRW_EndChunk(struct worker *w);
diff --git a/bin/varnishd/cache/cache_center.c b/bin/varnishd/cache/cache_center.c
index 137f198..345d0cf 100644
--- a/bin/varnishd/cache/cache_center.c
+++ b/bin/varnishd/cache/cache_center.c
@@ -338,7 +338,6 @@ cnt_deliver(struct sess *sp, struct worker *wrk, struct req *req)
 	RES_WriteObj(sp);
 
 	assert(WRW_IsReleased(wrk));
-	assert(wrk->wrw.ciov == wrk->wrw.siov);
 	(void)HSH_Deref(wrk, NULL, &req->obj);
 	http_Setup(req->resp, NULL);
 	sp->step = STP_DONE;
@@ -974,7 +973,6 @@ cnt_streambody(struct sess *sp, struct worker *wrk, struct req *req)
 	RES_StreamEnd(sp);
 
 	assert(WRW_IsReleased(wrk));
-	assert(wrk->wrw.ciov == wrk->wrw.siov);
 	(void)HSH_Deref(wrk, NULL, &req->obj);
 	VBO_DerefBusyObj(wrk, &wrk->busyobj);
 	http_Setup(req->resp, NULL);
@@ -1411,7 +1409,6 @@ cnt_recv(struct sess *sp, struct worker *wrk, struct req *req)
 	CHECK_OBJ_NOTNULL(req->vcl, VCL_CONF_MAGIC);
 	AZ(req->obj);
 	AZ(wrk->busyobj);
-	assert(wrk->wrw.ciov == wrk->wrw.siov);
 
 	/* By default we use the first backend */
 	AZ(req->director);
diff --git a/bin/varnishd/cache/cache_cli.c b/bin/varnishd/cache/cache_cli.c
index dd76c91..a2927fa 100644
--- a/bin/varnishd/cache/cache_cli.c
+++ b/bin/varnishd/cache/cache_cli.c
@@ -138,7 +138,6 @@ cli_debug_sizeof(struct cli *cli, const char * const *av, void *priv)
 	SZOF(struct vbc);
 	SZOF(struct VSC_C_main);
 	SZOF(struct lock);
-	SZOF(struct wrw);
 	SZOF(struct dstat);
 #if 0
 #define OFOF(foo, bar)	{ foo __foo; VCLI_Out(cli, \
diff --git a/bin/varnishd/cache/cache_session.c b/bin/varnishd/cache/cache_session.c
index 07b5cfb..ea0668b 100644
--- a/bin/varnishd/cache/cache_session.c
+++ b/bin/varnishd/cache/cache_session.c
@@ -155,7 +155,7 @@ ses_pool_task(struct worker *wrk, void *arg)
 	wrk->sp = NULL;
 	WS_Assert(wrk->aws);
 	AZ(wrk->busyobj);
-	AZ(wrk->wrw.wfd);
+	AZ(wrk->wrw);
 	assert(wrk->wlp == wrk->wlb);
 	if (cache_param->diag_bitmap & 0x00040000) {
 		if (wrk->vcl != NULL)
diff --git a/bin/varnishd/cache/cache_wrk.c b/bin/varnishd/cache/cache_wrk.c
index 98de83f..2f0b575 100644
--- a/bin/varnishd/cache/cache_wrk.c
+++ b/bin/varnishd/cache/cache_wrk.c
@@ -131,14 +131,11 @@ WRK_BgThread(pthread_t *thr, const char *name, bgthread_t *func, void *priv)
 /*--------------------------------------------------------------------*/
 
 static void *
-wrk_thread_real(void *priv, unsigned shm_workspace, unsigned thread_workspace,
-    unsigned siov)
+wrk_thread_real(void *priv, unsigned shm_workspace, unsigned thread_workspace)
 {
 	struct worker *w, ww;
 	uint32_t wlog[shm_workspace / 4];
-	/* XXX: can we trust these to be properly aligned ? */
 	unsigned char ws[thread_workspace];
-	struct iovec iov[siov];
 
 	THR_SetName("cache-worker");
 	w = &ww;
@@ -147,9 +144,6 @@ wrk_thread_real(void *priv, unsigned shm_workspace, unsigned thread_workspace,
 	w->lastused = NAN;
 	w->wlb = w->wlp = wlog;
 	w->wle = wlog + (sizeof wlog) / 4;
-	w->wrw.iov = iov;
-	w->wrw.siov = siov;
-	w->wrw.ciov = siov;
 	AZ(pthread_cond_init(&w->cond, NULL));
 
 	WS_Init(w->aws, "wrk", ws, thread_workspace);
@@ -173,18 +167,10 @@ wrk_thread_real(void *priv, unsigned shm_workspace, unsigned thread_workspace,
 void *
 WRK_thread(void *priv)
 {
-	uint16_t nhttp;
-	unsigned siov;
-
-	assert(cache_param->http_max_hdr <= 65535);
-	/* We need to snapshot these two for consistency */
-	nhttp = (uint16_t)cache_param->http_max_hdr;
-	siov = nhttp * 2;
-	if (siov > IOV_MAX)
-		siov = IOV_MAX;
+
 	return (wrk_thread_real(priv,
 	    cache_param->shm_workspace,
-	    cache_param->workspace_thread, siov));
+	    cache_param->workspace_thread));
 }
 
 void
diff --git a/bin/varnishd/cache/cache_wrw.c b/bin/varnishd/cache/cache_wrw.c
index 4f9bbc5..2c0d7c3 100644
--- a/bin/varnishd/cache/cache_wrw.c
+++ b/bin/varnishd/cache/cache_wrw.c
@@ -42,6 +42,21 @@
 #include "cache.h"
 #include "vtim.h"
 
+/*--------------------------------------------------------------------*/
+
+struct wrw {
+	unsigned		magic;
+#define WRW_MAGIC		0x2f2142e5
+	int			*wfd;
+	unsigned		werr;	/* valid after WRW_Flush() */
+	struct iovec		*iov;
+	unsigned		siov;
+	unsigned		niov;
+	ssize_t			liov;
+	ssize_t			cliov;
+	unsigned		ciov;	/* Chunked header marker */
+};
+
 /*--------------------------------------------------------------------
  */
 
@@ -49,22 +64,32 @@ int
 WRW_Error(const struct worker *wrk)
 {
 
-	return (wrk->wrw.werr);
+	return (wrk->wrw->werr);
 }
 
 void
 WRW_Reserve(struct worker *wrk, int *fd)
 {
 	struct wrw *wrw;
+	unsigned u;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	wrw = &wrk->wrw;
-	AZ(wrw->wfd);
+	AZ(wrk->wrw);
+	wrw = (void*)WS_Alloc(wrk->aws, sizeof *wrw);
+	AN(wrw);
+	memset(wrw, 0, sizeof *wrw);
+	wrw->magic = WRW_MAGIC;
+	u = WS_Reserve(wrk->aws, 0);
+	u /= sizeof(struct iovec);
+	AN(u);
+	wrw->iov = (void*)wrk->aws->f;
+	wrw->siov = u;
+	wrw->ciov = u;
 	wrw->werr = 0;
 	wrw->liov = 0;
 	wrw->niov = 0;
-	wrw->ciov = wrw->siov;
 	wrw->wfd = fd;
+	wrk->wrw = wrw;
 }
 
 static void
@@ -73,13 +98,11 @@ WRW_Release(struct worker *wrk)
 	struct wrw *wrw;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	wrw = &wrk->wrw;
-	AN(wrw->wfd);
-	wrw->werr = 0;
-	wrw->liov = 0;
-	wrw->niov = 0;
-	wrw->ciov = wrw->siov;
-	wrw->wfd = NULL;
+	wrw = wrk->wrw;
+	wrk->wrw = NULL;
+	CHECK_OBJ_NOTNULL(wrw, WRW_MAGIC);
+	WS_Release(wrk->aws, 0);
+	WS_Reset(wrk->aws, NULL);
 }
 
 static void
@@ -114,7 +137,8 @@ WRW_Flush(struct worker *wrk)
 	char cbuf[32];
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	wrw = &wrk->wrw;
+	wrw = wrk->wrw;
+	CHECK_OBJ_NOTNULL(wrw, WRW_MAGIC);
 	AN(wrw->wfd);
 
 	/* For chunked, there must be one slot reserved for the chunked tail */
@@ -186,7 +210,7 @@ WRW_FlushRelease(struct worker *wrk)
 	unsigned u;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	AN(wrk->wrw.wfd);
+	AN(wrk->wrw->wfd);
 	u = WRW_Flush(wrk);
 	WRW_Release(wrk);
 	return (u);
@@ -198,7 +222,7 @@ WRW_WriteH(struct worker *wrk, const txt *hh, const char *suf)
 	unsigned u;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	AN(wrk->wrw.wfd);
+	AN(wrk->wrw->wfd);
 	AN(wrk);
 	AN(hh);
 	AN(hh->b);
@@ -215,7 +239,8 @@ WRW_Write(struct worker *wrk, const void *ptr, int len)
 	struct wrw *wrw;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	wrw = &wrk->wrw;
+	wrw = wrk->wrw;
+	CHECK_OBJ_NOTNULL(wrw, WRW_MAGIC);
 	AN(wrw->wfd);
 	if (len == 0 || *wrw->wfd < 0)
 		return (0);
@@ -240,7 +265,8 @@ WRW_Chunked(struct worker *wrk)
 	struct wrw *wrw;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	wrw = &wrk->wrw;
+	wrw = wrk->wrw;
+	CHECK_OBJ_NOTNULL(wrw, WRW_MAGIC);
 
 	assert(wrw->ciov == wrw->siov);
 	/*
@@ -268,7 +294,8 @@ WRW_EndChunk(struct worker *wrk)
 	struct wrw *wrw;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	wrw = &wrk->wrw;
+	wrw = wrk->wrw;
+	CHECK_OBJ_NOTNULL(wrw, WRW_MAGIC);
 
 	assert(wrw->ciov < wrw->siov);
 	(void)WRW_Flush(wrk);
diff --git a/bin/varnishd/mgt/mgt_param.c b/bin/varnishd/mgt/mgt_param.c
index 97492ed..e2e1a3a 100644
--- a/bin/varnishd/mgt/mgt_param.c
+++ b/bin/varnishd/mgt/mgt_param.c
@@ -712,12 +712,16 @@ static const struct parspec input_parspec[] = {
 		DELAYED_EFFECT,
 		"64k", "bytes" },
 	{ "workspace_thread",
-		tweak_bytes_u, &mgt_param.workspace_thread, 256, 256,
-		"Bytes of auxillary workspace per thread."
-		/* XXX: See comment in cache.h */
-		"This is not the workspace you are looking for.",
+		tweak_bytes_u, &mgt_param.workspace_thread, 256, 8192,
+		"Bytes of auxillary workspace per thread.\n"
+		"This workspace is used for certain temporary data structures"
+		" during the operation of a worker thread.\n"
+		"One use is for the io-vectors for writing requests and"
+		" responses to sockets, having too little space will"
+		" result in more writev(2) system calls, having too much"
+		" just wastes the space.\n",
 		DELAYED_EFFECT,
-		"256", "bytes" },
+		"2048", "bytes" },
 	{ "http_req_hdr_len",
 		tweak_bytes_u, &mgt_param.http_req_hdr_len,
 		40, UINT_MAX,